App Ecosystem Tracker collector facts

Publisher: catherine (@catherine).

Version: 1. Last updated: 2026-07-02T13:09:30.336Z.

Run this collector on demand, as an API endpoint, or on a schedule with Firecrawl Prometheus.

Sample fields: apps, notes, app_id, reviews, app_name, ecosystem, review_text, review_title, tracking_key, review_author, review_rating, review_version.

Parameters: ecosystems (string, required), query (string, required), seed-urls (string, required), max-apps (number, required), output-mode (string, required), category (string), region (string), language (string), include-reviews (boolean), max-reviews-per-app (number), include-pricing (boolean), include-related-apps (boolean), snapshot-label (string), sort-hint (string).

App Ecosystem Tracker

v1Published

Tracks marketplace app listings across Chrome Web Store, Shopify App Store, and Stripe App Marketplace with current listing metadata, pricing, public metrics, and visible review samples.

Output & API

Preview the latest data, download it, or call this collector as an API.

Author's sample data
apps
collected_at2026-07-02T11:55:34.497Z
snapshot_label2026-07-02T11:55:34.497Z
Parameters
--ecosystemsstringrequiredComma-separated ecosystems to track; allowed values are chrome, shopify, and stripe. e.g. "shopify,chrome,stripe"
--querystringrequiredKeyword, use case, category phrase, or competitive theme used for official marketplace discovery; may be blank only when seed URLs are provided. e.g. "subscription management"
--seed-urlsstringrequiredComma-separated official listing, developer, search, category, or marketplace URLs to prioritize; may be blank when a query is provided. e.g. ""
--max-appsnumberrequiredMaximum number of unique apps to return across all requested ecosystems. e.g. 6
--output-modestringrequiredOutput layout: app_rows for one app list, or grouped_by_ecosystem for ecosystem groups. e.g. "app_rows"
--categorystringOptional category filter or phrase included in discovery searches. default ""
--regionstringCountry or market hint included in discovery when relevant. default "global"
--languagestringPreferred content language hint included in discovery. default "English"
--include-reviewsbooleanWhether to include visible review samples when accessible. default true
--max-reviews-per-appnumberMaximum number of visible review rows to capture per app. default 20
--include-pricingbooleanWhether to extract visible pricing text and normalized pricing fields. default true
--include-related-appsbooleanWhether to expand to clearly related official marketplace apps shown on scraped listing pages. default false
--snapshot-labelstringCaller-supplied tracking label; blank uses the current run timestamp. default ""
--sort-hintstringDiscovery hint such as best match, most reviewed, newest, or top rated. default "best match"

Marketplace

Publish this collector so others can deploy it — you keep ownership.

0 subscribers
catherine@catherine
0 runs in 14d · published 3d ago

Versions

Every build and self-heal appends a version. Pin one to lock runs to it.

managed by author
v1builtapprovedcurrent3d ago
How this script collects data
import Firecrawl from "@mendable/firecrawl-js";
import { parseArgs } from "node:util";

type Ecosystem = "chrome" | "shopify" | "stripe";
type OutputMode = "app_rows" | "grouped_by_ecosystem";

type ReviewRow = {
  tracking_key: string;
  ecosystem: Ecosystem;
  app_name: string | null;
  review_source_url: string;
  review_author: string | null;
  review_rating: number | null;
  review_title: string | null;
  review_text: string | null;
  review_date_text: string | null;
  review_date_normalized: string | null;
  review_version: string | null;
  developer_reply_text: string | null;
  developer_reply_date_text: string | null;
};

type AppRow = {
  ecosystem: Ecosystem;
  tracking_key: string;
  snapshot_label: string;
  collected_at: string;
  app_name: string | null;
  app_id: string | null;
  listing_url: string;
  developer_name: string | null;
  developer_url: string | null;
  developer_website: string | null;
  support_url: string | null;
  privacy_url: string | null;
  category: string | null;
  subcategory: string | null;
  query_match_reason: string | null;
  tagline: string | null;
  description_short: string | null;
  pricing_text_raw: string | null;
  pricing_model_normalized: string | null;
  starting_price: number | null;
  billing_period: string | null;
  free_trial_available: boolean | null;
  free_plan_available: boolean | null;
  rating_value: number | null;
  review_count: number | null;
  review_count_raw_text: string | null;
  install_or_user_count: number | null;
  install_or_user_count_raw_text: string | null;
  popularity_rank: number | null;
  rank_context: string | null;
  last_updated_text: string | null;
  last_updated_date_normalized: string | null;
  version_text: string | null;
  permissions_or_scopes: string[] | null;
  screenshots_count: number | null;
  source_page_url: string;
  secondary_source_urls: string[];
  confidence: "high" | "medium" | "low";
  notes: string | null;
  reviews?: ReviewRow[];
};

const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
  console.error("FIRECRAWL_API_KEY is not set");
  process.exit(1);
}

const firecrawl = new Firecrawl({ apiKey });

const { values: flags } = parseArgs({
  strict: true,
  options: {
    ecosystems: { type: "string" },
    query: { type: "string" },
    "seed-urls": { type: "string" },
    "max-apps": { type: "string" },
    "output-mode": { type: "string" },
    category: { type: "string" },
    region: { type: "string" },
    language: { type: "string" },
    "include-reviews": { type: "string" },
    "max-reviews-per-app": { type: "string" },
    "include-pricing": { type: "string" },
    "include-related-apps": { type: "string" },
    "snapshot-label": { type: "string" },
    "sort-hint": { type: "string" },
  },
});

function requiredFlag(name: string): string {
  const value = flags[name] as string | undefined;
  if (value === undefined) {
    console.error(`--${name} is required`);
    process.exit(1);
  }
  return value;
}

const collectedAt = new Date().toISOString();
const requestedEcosystems = requiredFlag("ecosystems")
  .split(",")
  .map((part) => part.trim().toLowerCase())
  .filter(Boolean);
const query = requiredFlag("query").trim();
const seedUrlsRaw = requiredFlag("seed-urls").trim();
const maxApps = Number(requiredFlag("max-apps"));
const outputMode = requiredFlag("output-mode") as OutputMode;
const category = ((flags.category as string | undefined) ?? "").trim();
const region = ((flags.region as string | undefined) ?? "global").trim() || "global";
const language = ((flags.language as string | undefined) ?? "English").trim() || "English";
const includeReviews = (((flags["include-reviews"] as string | undefined) ?? "true").trim() || "true") === "true";
const maxReviewsPerApp = Number(((flags["max-reviews-per-app"] as string | undefined) ?? "20").trim() || "20");
const includePricing = (((flags["include-pricing"] as string | undefined) ?? "true").trim() || "true") === "true";
const includeRelatedApps = (((flags["include-related-apps"] as string | undefined) ?? "false").trim() || "false") === "true";
const snapshotLabel = ((flags["snapshot-label"] as string | undefined) ?? "").trim() || collectedAt;
const sortHint = ((flags["sort-hint"] as string | undefined) ?? "best match").trim() || "best match";

const allowedEcosystems = new Set(["chrome", "shopify", "stripe"]);
const ecosystems = requestedEcosystems.filter((ecosystem): ecosystem is Ecosystem => allowedEcosystems.has(ecosystem));
if (ecosystems.length === 0) {
  throw new Error("OUT_OF_SCOPE: at least one supported ecosystem is required: chrome, shopify, or stripe");
}
if (ecosystems.length !== requestedEcosystems.length) {
  throw new Error("OUT_OF_SCOPE: ecosystems must be comma-separated values from chrome, shopify, stripe");
}
if (!query && !seedUrlsRaw) {
  throw new Error("OUT_OF_SCOPE: provide at least one of --query or --seed-urls");
}
if (!Number.isFinite(maxApps) || maxApps < 1) {
  throw new Error("OUT_OF_SCOPE: --max-apps must be a positive number");
}
if (outputMode !== "app_rows" && outputMode !== "grouped_by_ecosystem") {
  throw new Error("OUT_OF_SCOPE: --output-mode must be app_rows or grouped_by_ecosystem");
}
if (!Number.isFinite(maxReviewsPerApp) || maxReviewsPerApp < 0) {
  throw new Error("OUT_OF_SCOPE: --max-reviews-per-app must be zero or a positive number");
}

const seedUrls = seedUrlsRaw
  .split(",")
  .map((url) => canonicalizeUrl(url.trim()))
  .filter(Boolean);

function clean(value: string | null | undefined): string | null {
  if (!value) return null;
  const normalized = value
    .replace(/\\\*/g, "*")
    .replace(/\s+/g, " ")
    .replace(/\s+([,.!?;:])/g, "$1")
    .trim();
  return normalized || null;
}

function lines(markdown: string): string[] {
  return markdown
    .split("\n")
    .map((line) => clean(line.replace(/^[-*]\s+/, "")))
    .filter((line): line is string => Boolean(line));
}

function canonicalizeUrl(url: string): string {
  if (!url) return "";
  try {
    const parsed = new URL(url, "https://example.com");
    if (parsed.hostname === "example.com" && !url.startsWith("http")) return "";
    parsed.hash = "";
    parsed.search = "";
    parsed.pathname = parsed.pathname.replace(/\/+$/, "");
    return parsed.toString();
  } catch {
    return "";
  }
}

function urlHost(url: string): string {
  try {
    return new URL(url).hostname.replace(/^www\./, "");
  } catch {
    return "";
  }
}

function urlPath(url: string): string {
  try {
    return new URL(url).pathname;
  } catch {
    return "";
  }
}

function ecosystemForUrl(url: string): Ecosystem | null {
  const host = urlHost(url);
  if (host === "chromewebstore.google.com") return "chrome";
  if (host === "apps.shopify.com") return "shopify";
  if (host === "marketplace.stripe.com") return "stripe";
  return null;
}

function isListingUrl(url: string, ecosystem: Ecosystem): boolean {
  const path = urlPath(url);
  if (ecosystem === "chrome") return /\/detail\/[^/]+\/[a-p]{32}$/i.test(path);
  if (ecosystem === "shopify") {
    const slug = path.split("/").filter(Boolean)[0] ?? "";
    return Boolean(slug) && !["categories", "partners", "stories", "search", "login", "extensions"].includes(slug);
  }
  return ecosystem === "stripe" && /^\/apps\/[^/]+$/i.test(path);
}

function appIdFromUrl(url: string, ecosystem: Ecosystem): string | null {
  const parts = urlPath(url).split("/").filter(Boolean);
  if (ecosystem === "chrome") return parts[parts.length - 1] ?? null;
  if (ecosystem === "shopify") return parts[0] ?? null;
  if (ecosystem === "stripe") return parts[1] ?? null;
  return null;
}

function trackingKey(url: string, ecosystem: Ecosystem): string {
  return `${ecosystem}:${appIdFromUrl(url, ecosystem) ?? urlPath(url).replace(/[^a-z0-9]+/gi, "-").toLowerCase()}`;
}

function parseNumber(value: string | null): number | null {
  if (!value) return null;
  const match = value.replace(/,/g, "").match(/(\d+(?:\.\d+)?)\s*([KMB])?/i);
  if (!match) return null;
  const multiplier = match[2]?.toUpperCase() === "K" ? 1_000 : match[2]?.toUpperCase() === "M" ? 1_000_000 : match[2]?.toUpperCase() === "B" ? 1_000_000_000 : 1;
  return Math.round(Number(match[1]) * multiplier);
}

function parsePrice(text: string | null): { starting_price: number | null; billing_period: string | null } {
  if (!text) return { starting_price: null, billing_period: null };
  const price = text.match(/\$([0-9]+(?:\.[0-9]+)?)/);
  const period = text.match(/\/\s*(month|year|user|seat|transaction)|per\s+(month|year|user|seat|transaction)/i);
  return {
    starting_price: price ? Number(price[1]) : null,
    billing_period: period ? (period[1] ?? period[2]).toLowerCase() : null,
  };
}

function normalizePricing(text: string | null): string | null {
  if (!text) return null;
  const lower = text.toLowerCase();
  if (lower.includes("free plan") && lower.includes("$")) return "freemium";
  if (lower.includes("free plan")) return "free_plan_available";
  if (lower.includes("free to install")) return "free_to_install";
  if (lower.includes("free") && !lower.includes("$")) return "free";
  if (lower.includes("paid subscription")) return "paid_subscription";
  if (lower.includes("in-app purchase")) return "in_app_purchases";
  if (lower.includes("$")) return "paid";
  if (lower.includes("contact")) return "contact_sales";
  return null;
}

function normalizeDate(text: string | null): string | null {
  if (!text) return null;
  const parsed = Date.parse(text);
  if (!Number.isFinite(parsed)) return null;
  return new Date(parsed).toISOString().slice(0, 10);
}

function firstHeading(markdown: string): string | null {
  return clean(markdown.match(/^#\s+(.+)$/m)?.[1] ?? null);
}

function section(markdown: string, heading: string, stopHeadings: string[]): string | null {
  const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
  const start = markdown.search(new RegExp(`^#{1,3}\\s+${escaped}\\b.*$`, "im"));
  if (start < 0) return null;
  const rest = markdown.slice(start);
  let end = rest.length;
  for (const stop of stopHeadings) {
    const escapedStop = stop.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
    const match = rest.slice(1).search(new RegExp(`^#{1,3}\\s+${escapedStop}\\b.*$`, "im"));
    if (match >= 0) end = Math.min(end, match + 1);
  }
  return rest.slice(0, end).trim();
}

function lineAfter(markdownLines: string[], label: string): string | null {
  const index = markdownLines.findIndex((line) => line.toLowerCase() === label.toLowerCase());
  if (index < 0) return null;
  return markdownLines[index + 1] ?? null;
}

function linkByText(markdown: string, text: string): string | null {
  const pattern = new RegExp(`\\[${text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\]\\(([^)]+)\\)`, "i");
  const value = markdown.match(pattern)?.[1] ?? null;
  return value ? canonicalizeUrl(value) : null;
}

function extractLinks(markdown: string): string[] {
  const links: string[] = [];
  const pattern = /\[[^\]]+\]\((https?:\/\/[^)]+)\)/g;
  let match: RegExpExecArray | null;
  while ((match = pattern.exec(markdown))) {
    const url = canonicalizeUrl(match[1]);
    if (url) links.push(url);
  }
  return links;
}

function countScreenshots(markdown: string): number | null {
  const count = (markdown.match(/!\[[^\]]*(screenshot|media|feature)[^\]]*\]/gi) ?? []).length;
  return count || null;
}

async function scrapeMarkdown(url: string): Promise<{ markdown: string; metadata: Record<string, string> }> {
  let lastError: unknown = null;
  for (let attempt = 0; attempt < 2; attempt += 1) {
    try {
      const doc = await firecrawl.scrape(url, {
        formats: ["markdown"],
        onlyMainContent: true,
        integration: "prometheus",
      });
      const markdown = (doc.markdown ?? doc.data?.markdown ?? "") as string;
      const metadata = (doc.metadata ?? doc.data?.metadata ?? {}) as Record<string, string>;
      if (markdown.trim()) return { markdown, metadata };
      lastError = new Error("empty markdown");
    } catch (error) {
      lastError = error;
    }
  }
  throw new Error(`listing scrape failed for ${url}: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
}

async function discoverFromSearch(ecosystem: Ecosystem, remaining: number): Promise<string[]> {
  if (!query) return [];
  const extra = [category, region !== "global" ? region : "", language, sortHint].filter(Boolean).join(" ");
  const searchQuery =
    ecosystem === "chrome"
      ? `site:chromewebstore.google.com/detail ${query} ${extra} Chrome Web Store`
      : ecosystem === "shopify"
        ? `site:apps.shopify.com ${query} ${extra} Shopify App Store`
        : `site:marketplace.stripe.com/apps ${query} ${extra} Stripe App Marketplace`;
  const result = await firecrawl.search(searchQuery, {
    limit: Math.min(Math.max(remaining * 3, 3), 10),
    integration: "prometheus",
  });
  const urls = (result.web ?? result.data ?? [])
    .map((item: { url?: string }) => canonicalizeUrl(item.url ?? ""))
    .filter((url: string) => isListingUrl(url, ecosystem));
  return urls;
}

async function expandSeedUrl(url: string, ecosystem: Ecosystem): Promise<string[]> {
  if (isListingUrl(url, ecosystem)) return [url];
  const { markdown } = await scrapeMarkdown(url);
  return extractLinks(markdown).filter((link) => isListingUrl(link, ecosystem));
}

async function discoverListingUrls(): Promise<string[]> {
  const seen = new Set<string>();
  const out: string[] = [];
  function add(url: string, ecosystem: Ecosystem) {
    const canonical = canonicalizeUrl(url);
    if (!canonical || !isListingUrl(canonical, ecosystem) || seen.has(canonical)) return;
    seen.add(canonical);
    out.push(canonical);
  }

  for (const url of seedUrls) {
    const ecosystem = ecosystemForUrl(url);
    if (!ecosystem || !ecosystems.includes(ecosystem)) continue;
    try {
      for (const found of await expandSeedUrl(url, ecosystem)) add(found, ecosystem);
    } catch (error) {
      console.error(`Could not expand seed URL ${url}: ${error instanceof Error ? error.message : String(error)}`);
    }
    if (out.length >= maxApps) return out.slice(0, maxApps);
  }

  for (const ecosystem of ecosystems) {
    if (out.length >= maxApps) break;
    try {
      const quota = Math.max(1, Math.ceil(maxApps / ecosystems.length));
      for (const found of (await discoverFromSearch(ecosystem, quota)).slice(0, quota)) add(found, ecosystem);
    } catch (error) {
      console.error(`Search failed for ${ecosystem}: ${error instanceof Error ? error.message : String(error)}`);
    }
  }

  for (const ecosystem of ecosystems) {
    if (out.length >= maxApps) break;
    try {
      for (const found of await discoverFromSearch(ecosystem, maxApps - out.length)) add(found, ecosystem);
    } catch (error) {
      console.error(`Supplemental search failed for ${ecosystem}: ${error instanceof Error ? error.message : String(error)}`);
    }
  }

  return out.slice(0, maxApps);
}

function baseRow(ecosystem: Ecosystem, listingUrl: string, appName: string | null): AppRow {
  return {
    ecosystem,
    tracking_key: trackingKey(listingUrl, ecosystem),
    snapshot_label: snapshotLabel,
    collected_at: collectedAt,
    app_name: appName,
    app_id: appIdFromUrl(listingUrl, ecosystem),
    listing_url: listingUrl,
    developer_name: null,
    developer_url: null,
    developer_website: null,
    support_url: null,
    privacy_url: null,
    category: null,
    subcategory: null,
    query_match_reason: query ? `Discovered from official ${ecosystem} marketplace results for "${query}".` : "Provided as a seed URL.",
    tagline: null,
    description_short: null,
    pricing_text_raw: null,
    pricing_model_normalized: null,
    starting_price: null,
    billing_period: null,
    free_trial_available: null,
    free_plan_available: null,
    rating_value: null,
    review_count: null,
    review_count_raw_text: null,
    install_or_user_count: null,
    install_or_user_count_raw_text: null,
    popularity_rank: null,
    rank_context: null,
    last_updated_text: null,
    last_updated_date_normalized: null,
    version_text: null,
    permissions_or_scopes: null,
    screenshots_count: null,
    source_page_url: listingUrl,
    secondary_source_urls: [],
    confidence: "medium",
    notes: null,
  };
}

function parseChrome(markdown: string, metadata: Record<string, string>, listingUrl: string): AppRow {
  const name = firstHeading(markdown) ?? clean((metadata.title ?? "").replace(/ - Chrome Web Store$/, ""));
  const row = baseRow("chrome", listingUrl, name);
  const allLines = lines(markdown);
  row.tagline = clean(metadata.ogDescription ?? metadata.description ?? null);
  row.description_short = clean(section(markdown, "Overview", ["Details", "Privacy", "Reviews"])?.replace(/^##\s+Overview\s*/i, "").slice(0, 1000));
  const categoryLine = allLines.find((line) => line.includes("chromewebstore.google.com/category/extensions") && line.includes("users"));
  if (categoryLine) {
    const labels = [...categoryLine.matchAll(/\[([^\]]+)\]\(https:\/\/chromewebstore\.google\.com\/category\/[^)]+\)/g)].map((match) => clean(match[1]));
    row.category = labels[0] ?? null;
    row.subcategory = labels[1] ?? null;
    const usersRaw = categoryLine.match(/([0-9,.]+(?:\s*[KMB])?\s+users?)/i)?.[1] ?? null;
    row.install_or_user_count_raw_text = clean(usersRaw);
    row.install_or_user_count = parseNumber(usersRaw);
  }
  const ratingLine = allLines.find((line) => /out of 5|Average rating/i.test(line));
  row.rating_value = ratingLine ? Number(ratingLine.match(/([0-9.]+)\s+out of 5|Average rating\s+([0-9.]+)/i)?.[1] ?? ratingLine.match(/Average rating\s+([0-9.]+)/i)?.[1] ?? "NaN") : null;
  if (!Number.isFinite(row.rating_value)) row.rating_value = null;
  const noRatings = /No ratings/i.test(markdown);
  row.review_count_raw_text = noRatings ? "No ratings" : clean(markdown.match(/([0-9,.]+(?:\s*[KMB])?)\s+ratings?/i)?.[0] ?? null);
  row.review_count = noRatings ? 0 : parseNumber(row.review_count_raw_text);
  row.version_text = lineAfter(allLines, "Version");
  row.last_updated_text = lineAfter(allLines, "Updated");
  row.last_updated_date_normalized = normalizeDate(row.last_updated_text);
  row.developer_name = lineAfter(allLines, "Offered by") ?? lineAfter(allLines, "Developer");
  row.developer_website = linkByText(markdown, "Website");
  row.privacy_url = linkByText(markdown, "privacy policy");
  row.pricing_text_raw = includePricing && /Offers in-app purchases/i.test(markdown) ? "Offers in-app purchases" : null;
  row.pricing_model_normalized = normalizePricing(row.pricing_text_raw);
  row.screenshots_count = countScreenshots(markdown);
  row.permissions_or_scopes = /will not collect or use your data/i.test(markdown) ? ["Developer states that the extension will not collect or use user data."] : null;
  row.confidence = row.app_name ? "high" : "medium";
  row.notes = "Chrome fields are limited to details visible on the public Chrome Web Store listing.";
  return row;
}

function parseShopifyReviews(markdown: string, row: AppRow): ReviewRow[] {
  const reviewsSection = section(markdown, "Reviews", ["Support", "Featured in", "More apps like this"]) ?? "";
  const reviewLines = lines(reviewsSection);
  const reviews: ReviewRow[] = [];
  const countryNames = new Set(["United States", "Australia", "United Kingdom", "Canada", "India", "Germany", "France", "Spain", "Italy", "Netherlands"]);
  for (let i = 0; i < reviewLines.length && reviews.length < maxReviewsPerApp; i += 1) {
    const dateText = reviewLines[i];
    if (!/^[A-Z][a-z]+ \d{1,2}, \d{4}$/.test(dateText)) continue;
    const textParts: string[] = [];
    let cursor = i + 1;
    while (cursor < reviewLines.length && !/^[A-Z][a-z]+ \d{1,2}, \d{4}$/.test(reviewLines[cursor]) && !countryNames.has(reviewLines[cursor]) && !/ replied$/i.test(reviewLines[cursor])) {
      if (!/^Show more$/i.test(reviewLines[cursor])) textParts.push(reviewLines[cursor]);
      cursor += 1;
      if (textParts.join(" ").length > 1200) break;
    }
    const author = reviewLines[cursor] && !countryNames.has(reviewLines[cursor]) && !/ replied$/i.test(reviewLines[cursor]) ? reviewLines[cursor] : null;
    let replyText: string | null = null;
    let replyDate: string | null = null;
    const replyIndex = reviewLines.slice(cursor).findIndex((line) => / replied$/i.test(line));
    if (replyIndex >= 0 && replyIndex < 8) {
      const absoluteReplyIndex = cursor + replyIndex;
      replyDate = reviewLines[absoluteReplyIndex + 1] ?? null;
      replyText = reviewLines[absoluteReplyIndex + 2] && !/^Show more$/i.test(reviewLines[absoluteReplyIndex + 2]) ? reviewLines[absoluteReplyIndex + 2] : null;
    }
    reviews.push({
      tracking_key: row.tracking_key,
      ecosystem: row.ecosystem,
      app_name: row.app_name,
      review_source_url: `${row.listing_url}/reviews`,
      review_author: clean(author),
      review_rating: null,
      review_title: null,
      review_text: clean(textParts.join(" ")),
      review_date_text: dateText,
      review_date_normalized: normalizeDate(dateText),
      review_version: null,
      developer_reply_text: clean(replyText),
      developer_reply_date_text: clean(replyDate),
    });
  }
  return reviews.filter((review) => review.review_text);
}

function parseShopify(markdown: string, metadata: Record<string, string>, listingUrl: string): AppRow {
  const name = firstHeading(markdown) ?? clean((metadata.title ?? "").replace(/ - .*Shopify App Store.*$/i, ""));
  const row = baseRow("shopify", listingUrl, name);
  const allLines = lines(markdown);
  row.tagline = clean(metadata.ogDescription ?? metadata.description ?? null);
  const descriptionHeading = markdown.match(/^##\s+(?!Featured images gallery|Pricing|Reviews|Support|Featured in|More apps like this|Want to add)(.+)$/im)?.[1] ?? null;
  row.description_short = clean(descriptionHeading);
  const developerMatch = markdown.match(/Developer\[([^\]]+)\]\(([^)]+)\)/);
  row.developer_name = clean(developerMatch?.[1] ?? lineAfter(allLines, "Developer"));
  row.developer_url = developerMatch ? canonicalizeUrl(developerMatch[2]) : null;
  row.developer_website = linkByText(markdown, "Website");
  row.privacy_url = linkByText(markdown, "Privacy policy");
  row.support_url = linkByText(markdown, "FAQ") ?? linkByText(markdown, "App Documentation") ?? linkByText(markdown, "Tutorial");
  const categoryMatch = markdown.match(/Categories\s+\[([^\]]+)\]\((https:\/\/apps\.shopify\.com\/categories\/[^)]+)\)/i);
  row.category = clean(categoryMatch?.[1] ?? null);
  row.subcategory = null;
  const ratingMatch = markdown.match(/Rating\s*([0-9.]+)\s*\[\(([^)]+)\)\]/i) ?? markdown.match(/([0-9.]+)\s*out of 5 stars.*?\(([0-9,.K]+)\)/i);
  row.rating_value = ratingMatch ? Number(ratingMatch[1]) : null;
  row.review_count_raw_text = clean(markdown.match(/Reviews\s+\(([0-9,.K]+)\)/i)?.[0] ?? ratingMatch?.[2] ?? null);
  row.review_count = parseNumber(row.review_count_raw_text);
  const pricingSummary = clean(markdown.match(/Pricing\s+([^\n]+(?:\n[^\n]+){0,2})/i)?.[1] ?? null);
  const pricingSection = section(markdown, "Pricing", ["Reviews", "Support"]) ?? "";
  row.pricing_text_raw = includePricing ? clean([pricingSummary, pricingSection.replace(/^##\s+Pricing\s*/i, "").slice(0, 1800)].filter(Boolean).join(" | ")) : null;
  row.pricing_model_normalized = normalizePricing(row.pricing_text_raw);
  const price = parsePrice(row.pricing_text_raw);
  row.starting_price = price.starting_price;
  row.billing_period = price.billing_period;
  row.free_trial_available = row.pricing_text_raw ? /free trial/i.test(row.pricing_text_raw) : null;
  row.free_plan_available = row.pricing_text_raw ? /free plan|^free\b/i.test(row.pricing_text_raw) : null;
  row.install_or_user_count = null;
  row.install_or_user_count_raw_text = null;
  row.last_updated_text = lineAfter(allLines, "Launched");
  row.last_updated_date_normalized = normalizeDate(row.last_updated_text);
  const permissions = section(markdown, "Data access", ["Featured in", "More apps like this"]);
  row.permissions_or_scopes = permissions ? lines(permissions).slice(1, 30) : null;
  row.screenshots_count = countScreenshots(markdown);
  row.confidence = row.app_name && row.review_count !== null ? "high" : "medium";
  row.notes = "Shopify does not usually publish install counts on app listings; install fields are null unless visible.";
  if (includeReviews) row.reviews = parseShopifyReviews(markdown, row);
  return row;
}

function parseStripe(markdown: string, metadata: Record<string, string>, listingUrl: string): AppRow {
  const titleName = clean((metadata.ogTitle ?? metadata.title ?? "").replace(/\s*\|\s*Stripe.*$/i, ""));
  const allLines = lines(markdown);
  const nameIndex = titleName ? allLines.findIndex((line) => line === titleName) : -1;
  const row = baseRow("stripe", listingUrl, titleName ?? (nameIndex >= 0 ? allLines[nameIndex] : null));
  row.tagline = nameIndex >= 0 ? allLines[nameIndex + 1] ?? null : clean((metadata.ogDescription ?? metadata.description ?? "").replace(/^.*?—\s*/, ""));
  row.description_short = clean(section(markdown, "About", ["Permissions", "Privacy policy"])?.replace(/^About\s*/i, "").slice(0, 1000) ?? row.tagline);
  row.category = lineAfter(allLines, "Install app") ?? null;
  if (row.category && ["Built by", "Visible on", "Works with"].includes(row.category)) row.category = null;
  row.developer_name = lineAfter(allLines, "Built by");
  row.pricing_text_raw = includePricing ? lineAfter(allLines, "Pricing") : null;
  row.pricing_model_normalized = normalizePricing(row.pricing_text_raw);
  const price = parsePrice(row.pricing_text_raw);
  row.starting_price = price.starting_price;
  row.billing_period = price.billing_period;
  row.free_trial_available = row.pricing_text_raw ? /free trial/i.test(row.pricing_text_raw) : null;
  row.free_plan_available = row.pricing_text_raw ? /free plan|free$/i.test(row.pricing_text_raw) : null;
  row.support_url = linkByText(markdown, "Support site");
  row.privacy_url = linkByText(markdown, "Privacy policy");
  row.developer_website = row.support_url;
  const permissions = section(markdown, "Permissions", ["Privacy policy", "Terms"]);
  row.permissions_or_scopes = permissions ? lines(permissions).slice(1, 35) : null;
  row.screenshots_count = countScreenshots(markdown);
  row.review_count = null;
  row.review_count_raw_text = null;
  row.rating_value = null;
  row.confidence = row.app_name && row.developer_name ? "high" : "medium";
  row.notes = "Stripe App Marketplace listings expose fewer public metrics; rating, review, and install fields are null unless visible.";
  if (includeReviews) row.reviews = [];
  return row;
}

function relatedListingUrls(markdown: string, ecosystem: Ecosystem): string[] {
  if (!includeRelatedApps) return [];
  return extractLinks(markdown).filter((url) => isListingUrl(url, ecosystem));
}

async function extractApp(url: string): Promise<{ row: AppRow; related: string[] } | null> {
  const ecosystem = ecosystemForUrl(url);
  if (!ecosystem || !ecosystems.includes(ecosystem) || !isListingUrl(url, ecosystem)) return null;
  const { markdown, metadata } = await scrapeMarkdown(url);
  const row =
    ecosystem === "chrome"
      ? parseChrome(markdown, metadata, url)
      : ecosystem === "shopify"
        ? parseShopify(markdown, metadata, url)
        : parseStripe(markdown, metadata, url);
  row.secondary_source_urls = Array.from(new Set([row.developer_url, row.developer_website, row.support_url, row.privacy_url].filter((value): value is string => Boolean(value))));
  if (includeReviews && !row.reviews) row.reviews = [];
  return { row, related: relatedListingUrls(markdown, ecosystem) };
}

async function main() {
  const queue = await discoverListingUrls();
  const seen = new Set(queue);
  const rows: AppRow[] = [];
  while (queue.length && rows.length < maxApps) {
    const url = queue.shift() as string;
    try {
      const result = await extractApp(url);
      if (!result) continue;
      rows.push(result.row);
      for (const related of result.related) {
        if (rows.length + queue.length >= maxApps) break;
        if (!seen.has(related)) {
          seen.add(related);
          queue.push(related);
        }
      }
    } catch (error) {
      console.error(`Skipping ${url}: ${error instanceof Error ? error.message : String(error)}`);
    }
  }
  if (rows.length === 0) {
    throw new Error("no official app listings could be extracted for the requested inputs");
  }

  const out =
    outputMode === "grouped_by_ecosystem"
      ? {
          snapshot_label: snapshotLabel,
          collected_at: collectedAt,
          ecosystems: ecosystems.map((ecosystem) => ({
            ecosystem,
            apps: rows.filter((row) => row.ecosystem === ecosystem),
          })),
        }
      : {
          snapshot_label: snapshotLabel,
          collected_at: collectedAt,
          apps: rows,
        };
  process.stdout.write(JSON.stringify(out));
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
deploy to unlock

Deploy this collector to unlock schedules, the API endpoint, and destinations.

One person builds it. Everyone keeps it fresh.