App Ecosystem Tracker
v1PublishedTracks marketplace app listings across Chrome Web Store, Shopify App Store, and Stripe App Marketplace with current listing metadata, pricing, public metrics, and visible review samples.
Output & API
Preview the latest data, download it, or call this collector as an API.
| apps | |
|---|---|
| collected_at | 2026-07-02T11:55:34.497Z |
| snapshot_label | 2026-07-02T11:55:34.497Z |
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import Firecrawl from "@mendable/firecrawl-js";
import { parseArgs } from "node:util";
type Ecosystem = "chrome" | "shopify" | "stripe";
type OutputMode = "app_rows" | "grouped_by_ecosystem";
type ReviewRow = {
tracking_key: string;
ecosystem: Ecosystem;
app_name: string | null;
review_source_url: string;
review_author: string | null;
review_rating: number | null;
review_title: string | null;
review_text: string | null;
review_date_text: string | null;
review_date_normalized: string | null;
review_version: string | null;
developer_reply_text: string | null;
developer_reply_date_text: string | null;
};
type AppRow = {
ecosystem: Ecosystem;
tracking_key: string;
snapshot_label: string;
collected_at: string;
app_name: string | null;
app_id: string | null;
listing_url: string;
developer_name: string | null;
developer_url: string | null;
developer_website: string | null;
support_url: string | null;
privacy_url: string | null;
category: string | null;
subcategory: string | null;
query_match_reason: string | null;
tagline: string | null;
description_short: string | null;
pricing_text_raw: string | null;
pricing_model_normalized: string | null;
starting_price: number | null;
billing_period: string | null;
free_trial_available: boolean | null;
free_plan_available: boolean | null;
rating_value: number | null;
review_count: number | null;
review_count_raw_text: string | null;
install_or_user_count: number | null;
install_or_user_count_raw_text: string | null;
popularity_rank: number | null;
rank_context: string | null;
last_updated_text: string | null;
last_updated_date_normalized: string | null;
version_text: string | null;
permissions_or_scopes: string[] | null;
screenshots_count: number | null;
source_page_url: string;
secondary_source_urls: string[];
confidence: "high" | "medium" | "low";
notes: string | null;
reviews?: ReviewRow[];
};
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const { values: flags } = parseArgs({
strict: true,
options: {
ecosystems: { type: "string" },
query: { type: "string" },
"seed-urls": { type: "string" },
"max-apps": { type: "string" },
"output-mode": { type: "string" },
category: { type: "string" },
region: { type: "string" },
language: { type: "string" },
"include-reviews": { type: "string" },
"max-reviews-per-app": { type: "string" },
"include-pricing": { type: "string" },
"include-related-apps": { type: "string" },
"snapshot-label": { type: "string" },
"sort-hint": { type: "string" },
},
});
function requiredFlag(name: string): string {
const value = flags[name] as string | undefined;
if (value === undefined) {
console.error(`--${name} is required`);
process.exit(1);
}
return value;
}
const collectedAt = new Date().toISOString();
const requestedEcosystems = requiredFlag("ecosystems")
.split(",")
.map((part) => part.trim().toLowerCase())
.filter(Boolean);
const query = requiredFlag("query").trim();
const seedUrlsRaw = requiredFlag("seed-urls").trim();
const maxApps = Number(requiredFlag("max-apps"));
const outputMode = requiredFlag("output-mode") as OutputMode;
const category = ((flags.category as string | undefined) ?? "").trim();
const region = ((flags.region as string | undefined) ?? "global").trim() || "global";
const language = ((flags.language as string | undefined) ?? "English").trim() || "English";
const includeReviews = (((flags["include-reviews"] as string | undefined) ?? "true").trim() || "true") === "true";
const maxReviewsPerApp = Number(((flags["max-reviews-per-app"] as string | undefined) ?? "20").trim() || "20");
const includePricing = (((flags["include-pricing"] as string | undefined) ?? "true").trim() || "true") === "true";
const includeRelatedApps = (((flags["include-related-apps"] as string | undefined) ?? "false").trim() || "false") === "true";
const snapshotLabel = ((flags["snapshot-label"] as string | undefined) ?? "").trim() || collectedAt;
const sortHint = ((flags["sort-hint"] as string | undefined) ?? "best match").trim() || "best match";
const allowedEcosystems = new Set(["chrome", "shopify", "stripe"]);
const ecosystems = requestedEcosystems.filter((ecosystem): ecosystem is Ecosystem => allowedEcosystems.has(ecosystem));
if (ecosystems.length === 0) {
throw new Error("OUT_OF_SCOPE: at least one supported ecosystem is required: chrome, shopify, or stripe");
}
if (ecosystems.length !== requestedEcosystems.length) {
throw new Error("OUT_OF_SCOPE: ecosystems must be comma-separated values from chrome, shopify, stripe");
}
if (!query && !seedUrlsRaw) {
throw new Error("OUT_OF_SCOPE: provide at least one of --query or --seed-urls");
}
if (!Number.isFinite(maxApps) || maxApps < 1) {
throw new Error("OUT_OF_SCOPE: --max-apps must be a positive number");
}
if (outputMode !== "app_rows" && outputMode !== "grouped_by_ecosystem") {
throw new Error("OUT_OF_SCOPE: --output-mode must be app_rows or grouped_by_ecosystem");
}
if (!Number.isFinite(maxReviewsPerApp) || maxReviewsPerApp < 0) {
throw new Error("OUT_OF_SCOPE: --max-reviews-per-app must be zero or a positive number");
}
const seedUrls = seedUrlsRaw
.split(",")
.map((url) => canonicalizeUrl(url.trim()))
.filter(Boolean);
function clean(value: string | null | undefined): string | null {
if (!value) return null;
const normalized = value
.replace(/\\\*/g, "*")
.replace(/\s+/g, " ")
.replace(/\s+([,.!?;:])/g, "$1")
.trim();
return normalized || null;
}
function lines(markdown: string): string[] {
return markdown
.split("\n")
.map((line) => clean(line.replace(/^[-*]\s+/, "")))
.filter((line): line is string => Boolean(line));
}
function canonicalizeUrl(url: string): string {
if (!url) return "";
try {
const parsed = new URL(url, "https://example.com");
if (parsed.hostname === "example.com" && !url.startsWith("http")) return "";
parsed.hash = "";
parsed.search = "";
parsed.pathname = parsed.pathname.replace(/\/+$/, "");
return parsed.toString();
} catch {
return "";
}
}
function urlHost(url: string): string {
try {
return new URL(url).hostname.replace(/^www\./, "");
} catch {
return "";
}
}
function urlPath(url: string): string {
try {
return new URL(url).pathname;
} catch {
return "";
}
}
function ecosystemForUrl(url: string): Ecosystem | null {
const host = urlHost(url);
if (host === "chromewebstore.google.com") return "chrome";
if (host === "apps.shopify.com") return "shopify";
if (host === "marketplace.stripe.com") return "stripe";
return null;
}
function isListingUrl(url: string, ecosystem: Ecosystem): boolean {
const path = urlPath(url);
if (ecosystem === "chrome") return /\/detail\/[^/]+\/[a-p]{32}$/i.test(path);
if (ecosystem === "shopify") {
const slug = path.split("/").filter(Boolean)[0] ?? "";
return Boolean(slug) && !["categories", "partners", "stories", "search", "login", "extensions"].includes(slug);
}
return ecosystem === "stripe" && /^\/apps\/[^/]+$/i.test(path);
}
function appIdFromUrl(url: string, ecosystem: Ecosystem): string | null {
const parts = urlPath(url).split("/").filter(Boolean);
if (ecosystem === "chrome") return parts[parts.length - 1] ?? null;
if (ecosystem === "shopify") return parts[0] ?? null;
if (ecosystem === "stripe") return parts[1] ?? null;
return null;
}
function trackingKey(url: string, ecosystem: Ecosystem): string {
return `${ecosystem}:${appIdFromUrl(url, ecosystem) ?? urlPath(url).replace(/[^a-z0-9]+/gi, "-").toLowerCase()}`;
}
function parseNumber(value: string | null): number | null {
if (!value) return null;
const match = value.replace(/,/g, "").match(/(\d+(?:\.\d+)?)\s*([KMB])?/i);
if (!match) return null;
const multiplier = match[2]?.toUpperCase() === "K" ? 1_000 : match[2]?.toUpperCase() === "M" ? 1_000_000 : match[2]?.toUpperCase() === "B" ? 1_000_000_000 : 1;
return Math.round(Number(match[1]) * multiplier);
}
function parsePrice(text: string | null): { starting_price: number | null; billing_period: string | null } {
if (!text) return { starting_price: null, billing_period: null };
const price = text.match(/\$([0-9]+(?:\.[0-9]+)?)/);
const period = text.match(/\/\s*(month|year|user|seat|transaction)|per\s+(month|year|user|seat|transaction)/i);
return {
starting_price: price ? Number(price[1]) : null,
billing_period: period ? (period[1] ?? period[2]).toLowerCase() : null,
};
}
function normalizePricing(text: string | null): string | null {
if (!text) return null;
const lower = text.toLowerCase();
if (lower.includes("free plan") && lower.includes("$")) return "freemium";
if (lower.includes("free plan")) return "free_plan_available";
if (lower.includes("free to install")) return "free_to_install";
if (lower.includes("free") && !lower.includes("$")) return "free";
if (lower.includes("paid subscription")) return "paid_subscription";
if (lower.includes("in-app purchase")) return "in_app_purchases";
if (lower.includes("$")) return "paid";
if (lower.includes("contact")) return "contact_sales";
return null;
}
function normalizeDate(text: string | null): string | null {
if (!text) return null;
const parsed = Date.parse(text);
if (!Number.isFinite(parsed)) return null;
return new Date(parsed).toISOString().slice(0, 10);
}
function firstHeading(markdown: string): string | null {
return clean(markdown.match(/^#\s+(.+)$/m)?.[1] ?? null);
}
function section(markdown: string, heading: string, stopHeadings: string[]): string | null {
const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const start = markdown.search(new RegExp(`^#{1,3}\\s+${escaped}\\b.*$`, "im"));
if (start < 0) return null;
const rest = markdown.slice(start);
let end = rest.length;
for (const stop of stopHeadings) {
const escapedStop = stop.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const match = rest.slice(1).search(new RegExp(`^#{1,3}\\s+${escapedStop}\\b.*$`, "im"));
if (match >= 0) end = Math.min(end, match + 1);
}
return rest.slice(0, end).trim();
}
function lineAfter(markdownLines: string[], label: string): string | null {
const index = markdownLines.findIndex((line) => line.toLowerCase() === label.toLowerCase());
if (index < 0) return null;
return markdownLines[index + 1] ?? null;
}
function linkByText(markdown: string, text: string): string | null {
const pattern = new RegExp(`\\[${text.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\]\\(([^)]+)\\)`, "i");
const value = markdown.match(pattern)?.[1] ?? null;
return value ? canonicalizeUrl(value) : null;
}
function extractLinks(markdown: string): string[] {
const links: string[] = [];
const pattern = /\[[^\]]+\]\((https?:\/\/[^)]+)\)/g;
let match: RegExpExecArray | null;
while ((match = pattern.exec(markdown))) {
const url = canonicalizeUrl(match[1]);
if (url) links.push(url);
}
return links;
}
function countScreenshots(markdown: string): number | null {
const count = (markdown.match(/!\[[^\]]*(screenshot|media|feature)[^\]]*\]/gi) ?? []).length;
return count || null;
}
async function scrapeMarkdown(url: string): Promise<{ markdown: string; metadata: Record<string, string> }> {
let lastError: unknown = null;
for (let attempt = 0; attempt < 2; attempt += 1) {
try {
const doc = await firecrawl.scrape(url, {
formats: ["markdown"],
onlyMainContent: true,
integration: "prometheus",
});
const markdown = (doc.markdown ?? doc.data?.markdown ?? "") as string;
const metadata = (doc.metadata ?? doc.data?.metadata ?? {}) as Record<string, string>;
if (markdown.trim()) return { markdown, metadata };
lastError = new Error("empty markdown");
} catch (error) {
lastError = error;
}
}
throw new Error(`listing scrape failed for ${url}: ${lastError instanceof Error ? lastError.message : String(lastError)}`);
}
async function discoverFromSearch(ecosystem: Ecosystem, remaining: number): Promise<string[]> {
if (!query) return [];
const extra = [category, region !== "global" ? region : "", language, sortHint].filter(Boolean).join(" ");
const searchQuery =
ecosystem === "chrome"
? `site:chromewebstore.google.com/detail ${query} ${extra} Chrome Web Store`
: ecosystem === "shopify"
? `site:apps.shopify.com ${query} ${extra} Shopify App Store`
: `site:marketplace.stripe.com/apps ${query} ${extra} Stripe App Marketplace`;
const result = await firecrawl.search(searchQuery, {
limit: Math.min(Math.max(remaining * 3, 3), 10),
integration: "prometheus",
});
const urls = (result.web ?? result.data ?? [])
.map((item: { url?: string }) => canonicalizeUrl(item.url ?? ""))
.filter((url: string) => isListingUrl(url, ecosystem));
return urls;
}
async function expandSeedUrl(url: string, ecosystem: Ecosystem): Promise<string[]> {
if (isListingUrl(url, ecosystem)) return [url];
const { markdown } = await scrapeMarkdown(url);
return extractLinks(markdown).filter((link) => isListingUrl(link, ecosystem));
}
async function discoverListingUrls(): Promise<string[]> {
const seen = new Set<string>();
const out: string[] = [];
function add(url: string, ecosystem: Ecosystem) {
const canonical = canonicalizeUrl(url);
if (!canonical || !isListingUrl(canonical, ecosystem) || seen.has(canonical)) return;
seen.add(canonical);
out.push(canonical);
}
for (const url of seedUrls) {
const ecosystem = ecosystemForUrl(url);
if (!ecosystem || !ecosystems.includes(ecosystem)) continue;
try {
for (const found of await expandSeedUrl(url, ecosystem)) add(found, ecosystem);
} catch (error) {
console.error(`Could not expand seed URL ${url}: ${error instanceof Error ? error.message : String(error)}`);
}
if (out.length >= maxApps) return out.slice(0, maxApps);
}
for (const ecosystem of ecosystems) {
if (out.length >= maxApps) break;
try {
const quota = Math.max(1, Math.ceil(maxApps / ecosystems.length));
for (const found of (await discoverFromSearch(ecosystem, quota)).slice(0, quota)) add(found, ecosystem);
} catch (error) {
console.error(`Search failed for ${ecosystem}: ${error instanceof Error ? error.message : String(error)}`);
}
}
for (const ecosystem of ecosystems) {
if (out.length >= maxApps) break;
try {
for (const found of await discoverFromSearch(ecosystem, maxApps - out.length)) add(found, ecosystem);
} catch (error) {
console.error(`Supplemental search failed for ${ecosystem}: ${error instanceof Error ? error.message : String(error)}`);
}
}
return out.slice(0, maxApps);
}
function baseRow(ecosystem: Ecosystem, listingUrl: string, appName: string | null): AppRow {
return {
ecosystem,
tracking_key: trackingKey(listingUrl, ecosystem),
snapshot_label: snapshotLabel,
collected_at: collectedAt,
app_name: appName,
app_id: appIdFromUrl(listingUrl, ecosystem),
listing_url: listingUrl,
developer_name: null,
developer_url: null,
developer_website: null,
support_url: null,
privacy_url: null,
category: null,
subcategory: null,
query_match_reason: query ? `Discovered from official ${ecosystem} marketplace results for "${query}".` : "Provided as a seed URL.",
tagline: null,
description_short: null,
pricing_text_raw: null,
pricing_model_normalized: null,
starting_price: null,
billing_period: null,
free_trial_available: null,
free_plan_available: null,
rating_value: null,
review_count: null,
review_count_raw_text: null,
install_or_user_count: null,
install_or_user_count_raw_text: null,
popularity_rank: null,
rank_context: null,
last_updated_text: null,
last_updated_date_normalized: null,
version_text: null,
permissions_or_scopes: null,
screenshots_count: null,
source_page_url: listingUrl,
secondary_source_urls: [],
confidence: "medium",
notes: null,
};
}
function parseChrome(markdown: string, metadata: Record<string, string>, listingUrl: string): AppRow {
const name = firstHeading(markdown) ?? clean((metadata.title ?? "").replace(/ - Chrome Web Store$/, ""));
const row = baseRow("chrome", listingUrl, name);
const allLines = lines(markdown);
row.tagline = clean(metadata.ogDescription ?? metadata.description ?? null);
row.description_short = clean(section(markdown, "Overview", ["Details", "Privacy", "Reviews"])?.replace(/^##\s+Overview\s*/i, "").slice(0, 1000));
const categoryLine = allLines.find((line) => line.includes("chromewebstore.google.com/category/extensions") && line.includes("users"));
if (categoryLine) {
const labels = [...categoryLine.matchAll(/\[([^\]]+)\]\(https:\/\/chromewebstore\.google\.com\/category\/[^)]+\)/g)].map((match) => clean(match[1]));
row.category = labels[0] ?? null;
row.subcategory = labels[1] ?? null;
const usersRaw = categoryLine.match(/([0-9,.]+(?:\s*[KMB])?\s+users?)/i)?.[1] ?? null;
row.install_or_user_count_raw_text = clean(usersRaw);
row.install_or_user_count = parseNumber(usersRaw);
}
const ratingLine = allLines.find((line) => /out of 5|Average rating/i.test(line));
row.rating_value = ratingLine ? Number(ratingLine.match(/([0-9.]+)\s+out of 5|Average rating\s+([0-9.]+)/i)?.[1] ?? ratingLine.match(/Average rating\s+([0-9.]+)/i)?.[1] ?? "NaN") : null;
if (!Number.isFinite(row.rating_value)) row.rating_value = null;
const noRatings = /No ratings/i.test(markdown);
row.review_count_raw_text = noRatings ? "No ratings" : clean(markdown.match(/([0-9,.]+(?:\s*[KMB])?)\s+ratings?/i)?.[0] ?? null);
row.review_count = noRatings ? 0 : parseNumber(row.review_count_raw_text);
row.version_text = lineAfter(allLines, "Version");
row.last_updated_text = lineAfter(allLines, "Updated");
row.last_updated_date_normalized = normalizeDate(row.last_updated_text);
row.developer_name = lineAfter(allLines, "Offered by") ?? lineAfter(allLines, "Developer");
row.developer_website = linkByText(markdown, "Website");
row.privacy_url = linkByText(markdown, "privacy policy");
row.pricing_text_raw = includePricing && /Offers in-app purchases/i.test(markdown) ? "Offers in-app purchases" : null;
row.pricing_model_normalized = normalizePricing(row.pricing_text_raw);
row.screenshots_count = countScreenshots(markdown);
row.permissions_or_scopes = /will not collect or use your data/i.test(markdown) ? ["Developer states that the extension will not collect or use user data."] : null;
row.confidence = row.app_name ? "high" : "medium";
row.notes = "Chrome fields are limited to details visible on the public Chrome Web Store listing.";
return row;
}
function parseShopifyReviews(markdown: string, row: AppRow): ReviewRow[] {
const reviewsSection = section(markdown, "Reviews", ["Support", "Featured in", "More apps like this"]) ?? "";
const reviewLines = lines(reviewsSection);
const reviews: ReviewRow[] = [];
const countryNames = new Set(["United States", "Australia", "United Kingdom", "Canada", "India", "Germany", "France", "Spain", "Italy", "Netherlands"]);
for (let i = 0; i < reviewLines.length && reviews.length < maxReviewsPerApp; i += 1) {
const dateText = reviewLines[i];
if (!/^[A-Z][a-z]+ \d{1,2}, \d{4}$/.test(dateText)) continue;
const textParts: string[] = [];
let cursor = i + 1;
while (cursor < reviewLines.length && !/^[A-Z][a-z]+ \d{1,2}, \d{4}$/.test(reviewLines[cursor]) && !countryNames.has(reviewLines[cursor]) && !/ replied$/i.test(reviewLines[cursor])) {
if (!/^Show more$/i.test(reviewLines[cursor])) textParts.push(reviewLines[cursor]);
cursor += 1;
if (textParts.join(" ").length > 1200) break;
}
const author = reviewLines[cursor] && !countryNames.has(reviewLines[cursor]) && !/ replied$/i.test(reviewLines[cursor]) ? reviewLines[cursor] : null;
let replyText: string | null = null;
let replyDate: string | null = null;
const replyIndex = reviewLines.slice(cursor).findIndex((line) => / replied$/i.test(line));
if (replyIndex >= 0 && replyIndex < 8) {
const absoluteReplyIndex = cursor + replyIndex;
replyDate = reviewLines[absoluteReplyIndex + 1] ?? null;
replyText = reviewLines[absoluteReplyIndex + 2] && !/^Show more$/i.test(reviewLines[absoluteReplyIndex + 2]) ? reviewLines[absoluteReplyIndex + 2] : null;
}
reviews.push({
tracking_key: row.tracking_key,
ecosystem: row.ecosystem,
app_name: row.app_name,
review_source_url: `${row.listing_url}/reviews`,
review_author: clean(author),
review_rating: null,
review_title: null,
review_text: clean(textParts.join(" ")),
review_date_text: dateText,
review_date_normalized: normalizeDate(dateText),
review_version: null,
developer_reply_text: clean(replyText),
developer_reply_date_text: clean(replyDate),
});
}
return reviews.filter((review) => review.review_text);
}
function parseShopify(markdown: string, metadata: Record<string, string>, listingUrl: string): AppRow {
const name = firstHeading(markdown) ?? clean((metadata.title ?? "").replace(/ - .*Shopify App Store.*$/i, ""));
const row = baseRow("shopify", listingUrl, name);
const allLines = lines(markdown);
row.tagline = clean(metadata.ogDescription ?? metadata.description ?? null);
const descriptionHeading = markdown.match(/^##\s+(?!Featured images gallery|Pricing|Reviews|Support|Featured in|More apps like this|Want to add)(.+)$/im)?.[1] ?? null;
row.description_short = clean(descriptionHeading);
const developerMatch = markdown.match(/Developer\[([^\]]+)\]\(([^)]+)\)/);
row.developer_name = clean(developerMatch?.[1] ?? lineAfter(allLines, "Developer"));
row.developer_url = developerMatch ? canonicalizeUrl(developerMatch[2]) : null;
row.developer_website = linkByText(markdown, "Website");
row.privacy_url = linkByText(markdown, "Privacy policy");
row.support_url = linkByText(markdown, "FAQ") ?? linkByText(markdown, "App Documentation") ?? linkByText(markdown, "Tutorial");
const categoryMatch = markdown.match(/Categories\s+\[([^\]]+)\]\((https:\/\/apps\.shopify\.com\/categories\/[^)]+)\)/i);
row.category = clean(categoryMatch?.[1] ?? null);
row.subcategory = null;
const ratingMatch = markdown.match(/Rating\s*([0-9.]+)\s*\[\(([^)]+)\)\]/i) ?? markdown.match(/([0-9.]+)\s*out of 5 stars.*?\(([0-9,.K]+)\)/i);
row.rating_value = ratingMatch ? Number(ratingMatch[1]) : null;
row.review_count_raw_text = clean(markdown.match(/Reviews\s+\(([0-9,.K]+)\)/i)?.[0] ?? ratingMatch?.[2] ?? null);
row.review_count = parseNumber(row.review_count_raw_text);
const pricingSummary = clean(markdown.match(/Pricing\s+([^\n]+(?:\n[^\n]+){0,2})/i)?.[1] ?? null);
const pricingSection = section(markdown, "Pricing", ["Reviews", "Support"]) ?? "";
row.pricing_text_raw = includePricing ? clean([pricingSummary, pricingSection.replace(/^##\s+Pricing\s*/i, "").slice(0, 1800)].filter(Boolean).join(" | ")) : null;
row.pricing_model_normalized = normalizePricing(row.pricing_text_raw);
const price = parsePrice(row.pricing_text_raw);
row.starting_price = price.starting_price;
row.billing_period = price.billing_period;
row.free_trial_available = row.pricing_text_raw ? /free trial/i.test(row.pricing_text_raw) : null;
row.free_plan_available = row.pricing_text_raw ? /free plan|^free\b/i.test(row.pricing_text_raw) : null;
row.install_or_user_count = null;
row.install_or_user_count_raw_text = null;
row.last_updated_text = lineAfter(allLines, "Launched");
row.last_updated_date_normalized = normalizeDate(row.last_updated_text);
const permissions = section(markdown, "Data access", ["Featured in", "More apps like this"]);
row.permissions_or_scopes = permissions ? lines(permissions).slice(1, 30) : null;
row.screenshots_count = countScreenshots(markdown);
row.confidence = row.app_name && row.review_count !== null ? "high" : "medium";
row.notes = "Shopify does not usually publish install counts on app listings; install fields are null unless visible.";
if (includeReviews) row.reviews = parseShopifyReviews(markdown, row);
return row;
}
function parseStripe(markdown: string, metadata: Record<string, string>, listingUrl: string): AppRow {
const titleName = clean((metadata.ogTitle ?? metadata.title ?? "").replace(/\s*\|\s*Stripe.*$/i, ""));
const allLines = lines(markdown);
const nameIndex = titleName ? allLines.findIndex((line) => line === titleName) : -1;
const row = baseRow("stripe", listingUrl, titleName ?? (nameIndex >= 0 ? allLines[nameIndex] : null));
row.tagline = nameIndex >= 0 ? allLines[nameIndex + 1] ?? null : clean((metadata.ogDescription ?? metadata.description ?? "").replace(/^.*?—\s*/, ""));
row.description_short = clean(section(markdown, "About", ["Permissions", "Privacy policy"])?.replace(/^About\s*/i, "").slice(0, 1000) ?? row.tagline);
row.category = lineAfter(allLines, "Install app") ?? null;
if (row.category && ["Built by", "Visible on", "Works with"].includes(row.category)) row.category = null;
row.developer_name = lineAfter(allLines, "Built by");
row.pricing_text_raw = includePricing ? lineAfter(allLines, "Pricing") : null;
row.pricing_model_normalized = normalizePricing(row.pricing_text_raw);
const price = parsePrice(row.pricing_text_raw);
row.starting_price = price.starting_price;
row.billing_period = price.billing_period;
row.free_trial_available = row.pricing_text_raw ? /free trial/i.test(row.pricing_text_raw) : null;
row.free_plan_available = row.pricing_text_raw ? /free plan|free$/i.test(row.pricing_text_raw) : null;
row.support_url = linkByText(markdown, "Support site");
row.privacy_url = linkByText(markdown, "Privacy policy");
row.developer_website = row.support_url;
const permissions = section(markdown, "Permissions", ["Privacy policy", "Terms"]);
row.permissions_or_scopes = permissions ? lines(permissions).slice(1, 35) : null;
row.screenshots_count = countScreenshots(markdown);
row.review_count = null;
row.review_count_raw_text = null;
row.rating_value = null;
row.confidence = row.app_name && row.developer_name ? "high" : "medium";
row.notes = "Stripe App Marketplace listings expose fewer public metrics; rating, review, and install fields are null unless visible.";
if (includeReviews) row.reviews = [];
return row;
}
function relatedListingUrls(markdown: string, ecosystem: Ecosystem): string[] {
if (!includeRelatedApps) return [];
return extractLinks(markdown).filter((url) => isListingUrl(url, ecosystem));
}
async function extractApp(url: string): Promise<{ row: AppRow; related: string[] } | null> {
const ecosystem = ecosystemForUrl(url);
if (!ecosystem || !ecosystems.includes(ecosystem) || !isListingUrl(url, ecosystem)) return null;
const { markdown, metadata } = await scrapeMarkdown(url);
const row =
ecosystem === "chrome"
? parseChrome(markdown, metadata, url)
: ecosystem === "shopify"
? parseShopify(markdown, metadata, url)
: parseStripe(markdown, metadata, url);
row.secondary_source_urls = Array.from(new Set([row.developer_url, row.developer_website, row.support_url, row.privacy_url].filter((value): value is string => Boolean(value))));
if (includeReviews && !row.reviews) row.reviews = [];
return { row, related: relatedListingUrls(markdown, ecosystem) };
}
async function main() {
const queue = await discoverListingUrls();
const seen = new Set(queue);
const rows: AppRow[] = [];
while (queue.length && rows.length < maxApps) {
const url = queue.shift() as string;
try {
const result = await extractApp(url);
if (!result) continue;
rows.push(result.row);
for (const related of result.related) {
if (rows.length + queue.length >= maxApps) break;
if (!seen.has(related)) {
seen.add(related);
queue.push(related);
}
}
} catch (error) {
console.error(`Skipping ${url}: ${error instanceof Error ? error.message : String(error)}`);
}
}
if (rows.length === 0) {
throw new Error("no official app listings could be extracted for the requested inputs");
}
const out =
outputMode === "grouped_by_ecosystem"
? {
snapshot_label: snapshotLabel,
collected_at: collectedAt,
ecosystems: ecosystems.map((ecosystem) => ({
ecosystem,
apps: rows.filter((row) => row.ecosystem === ecosystem),
})),
}
: {
snapshot_label: snapshotLabel,
collected_at: collectedAt,
apps: rows,
};
process.stdout.write(JSON.stringify(out));
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.