Public Case Study ROI Extractor
v1PublishedExtracts compact proof points and visible ROI metrics from official vendor case study and customer story pages.
Output & API
Preview the latest data, download it, or call this collector as an API.
| # | notes | use_case | confidence | vendor_name | customer_name | vendor_domain | case_study_url | numeric_results | outcome_summary | source_page_url | case_study_title | customer_industry | customer_size_or_type |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | null | Automating customer support inquiries | high | HubSpot | Sticos | hubspot.com | https://www.hubspot.com/case-studies/sticos | Automated 41% of incoming support inquiries, giving accountants and auditors instant help while freeing the team to shift from reactive tickets to proactive customer success. | https://www.hubspot.com/case-studies | Sticos | Professional Services | null | |
| 1 | null | Creating a new admissions blog | high | HubSpot | Morehouse College | hubspot.com | https://www.hubspot.com/case-studies/morehouse-college | Built a new admissions blog which resulted in faster updates, higher engagement, and a consistent brand voice across every page. | https://www.hubspot.com/case-studies | Morehouse | Education | null | |
| 2 | null | Unified customer data management | high | HubSpot | Motorola Solutions | hubspot.com | https://www.hubspot.com/case-studies/motorola-solutions | [] | Unified 123,000+ customer records, providing real-time access to trusted data and uncovering cross-sell opportunities that generated millions in revenue. | https://www.hubspot.com/case-studies | Motorola Solutions | Software & Technology | null |
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import Firecrawl from "@mendable/firecrawl-js";
import { parseArgs } from "node:util";
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const SEARCH_LIMIT_PER_VENDOR = 4;
const SOURCE_PAGE_LIMIT_PER_VENDOR = 3;
const CASE_STUDY_LIMIT_PER_SOURCE = 8;
const MAX_COMPANY_CAP = 10;
const { values: flags } = parseArgs({
strict: true,
options: {
"seed-urls": { type: "string" },
"max-companies": { type: "string" },
"output-mode": { type: "string" },
},
});
if (!flags["seed-urls"]) {
console.error("--seed-urls is required");
process.exit(1);
}
const outputMode = flags["output-mode"] ?? "case_study_rows";
if (outputMode !== "case_study_rows" && outputMode !== "grouped_by_company") {
throw new Error("OUT_OF_SCOPE: --output-mode must be case_study_rows or grouped_by_company");
}
const requestedMaxCompanies = Number(flags["max-companies"] ?? "1");
if (!Number.isFinite(requestedMaxCompanies) || requestedMaxCompanies < 1) {
throw new Error("OUT_OF_SCOPE: --max-companies must be a positive number");
}
const maxCompanies = Math.min(Math.floor(requestedMaxCompanies), MAX_COMPANY_CAP);
function normalizeSeedUrl(input: string): string | null {
const trimmed = input.trim();
if (!trimmed) return null;
const withProtocol = trimmed.includes("://") ? trimmed : `https://${trimmed}`;
try {
const url = new URL(withProtocol);
if (url.protocol !== "http:" && url.protocol !== "https:") return null;
url.hash = "";
return url.toString();
} catch {
return null;
}
}
function hostnameFromUrl(url: string): string {
const host = new URL(url).hostname.toLowerCase();
return host.startsWith("www.") ? host.slice(4) : host;
}
function domainMatches(candidateUrl: string, vendorDomain: string): boolean {
try {
const host = hostnameFromUrl(candidateUrl);
return host === vendorDomain || host.endsWith(`.${vendorDomain}`);
} catch {
return false;
}
}
function cleanUrl(url: string): string {
const parsed = new URL(url);
parsed.hash = "";
const paramsToDelete: string[] = [];
parsed.searchParams.forEach((_, key) => {
const lower = key.toLowerCase();
if (
lower.startsWith("utm_") ||
lower === "hubs_content" ||
lower === "hubs_content-cta" ||
lower === "ref" ||
lower === "source"
) {
paramsToDelete.push(key);
}
});
for (const key of paramsToDelete) parsed.searchParams.delete(key);
return parsed.toString();
}
function hasCaseStudySignal(url: string, title?: string, description?: string): boolean {
const text = `${url} ${title ?? ""} ${description ?? ""}`.toLowerCase();
const signals = [
"case-study",
"case-studies",
"customer-story",
"customer-stories",
"success-story",
"success-stories",
"customers",
"testimonial",
"testimonials",
];
return signals.some((signal) => text.includes(signal));
}
function deriveVendorName(domain: string): string {
const first = domain.split(".")[0] ?? domain;
if (!first) return domain;
return `${first.charAt(0).toUpperCase()}${first.slice(1)}`;
}
function compactText(value: unknown): string | null {
if (typeof value !== "string") return null;
const trimmed = value.split("\n").map((part) => part.trim()).filter(Boolean).join(" ");
const lower = trimmed.toLowerCase();
if (lower === "null" || lower === "n/a" || lower === "not visible" || lower === "not specified") return null;
return trimmed || null;
}
function normalizeStringArray(value: unknown): string[] {
if (!Array.isArray(value)) return [];
return value
.map((item) => compactText(item))
.filter((item): item is string => Boolean(item))
.slice(0, 8);
}
function confidenceValue(value: unknown): "high" | "medium" | "low" {
const normalized = compactText(value)?.toLowerCase();
if (normalized === "high" || normalized === "medium" || normalized === "low") return normalized;
return "medium";
}
function dedupeKey(row: CaseStudyRow): string {
const urlPart = row.case_study_url ? cleanUrl(row.case_study_url).toLowerCase() : "";
const customerPart = (row.customer_name ?? "").toLowerCase();
const titlePart = (row.case_study_title ?? "").toLowerCase();
return `${urlPart}|${customerPart}|${titlePart}`;
}
type CaseStudyRow = {
vendor_name: string;
vendor_domain: string;
source_page_url: string;
case_study_title: string | null;
customer_name: string | null;
customer_industry: string | null;
customer_size_or_type: string | null;
use_case: string | null;
outcome_summary: string | null;
numeric_results: string[];
case_study_url: string | null;
confidence: "high" | "medium" | "low";
notes: string | null;
};
type VendorInput = {
seedUrl: string;
vendorDomain: string;
};
const seedUrls = String(flags["seed-urls"])
.split(",")
.map(normalizeSeedUrl)
.filter((url): url is string => Boolean(url))
.slice(0, maxCompanies);
if (seedUrls.length === 0) {
throw new Error("OUT_OF_SCOPE: --seed-urls must contain at least one valid http or https URL");
}
const vendors: VendorInput[] = seedUrls.map((seedUrl) => ({
seedUrl,
vendorDomain: hostnameFromUrl(seedUrl),
}));
function extractionSchema() {
return {
type: "object",
properties: {
vendor_name: { type: ["string", "null"] },
case_studies: {
type: "array",
maxItems: CASE_STUDY_LIMIT_PER_SOURCE,
items: {
type: "object",
properties: {
case_study_title: { type: ["string", "null"] },
customer_name: { type: ["string", "null"] },
customer_industry: { type: ["string", "null"] },
customer_size_or_type: { type: ["string", "null"] },
use_case: { type: ["string", "null"] },
outcome_summary: { type: ["string", "null"] },
numeric_results: { type: "array", items: { type: "string" } },
case_study_url: { type: ["string", "null"] },
confidence: { type: "string", enum: ["high", "medium", "low"] },
notes: { type: ["string", "null"] },
},
required: [
"case_study_title",
"customer_name",
"customer_industry",
"customer_size_or_type",
"use_case",
"outcome_summary",
"numeric_results",
"case_study_url",
"confidence",
"notes",
],
},
},
},
required: ["vendor_name", "case_studies"],
};
}
async function discoverSourcePages(vendor: VendorInput): Promise<string[]> {
const candidates: string[] = [];
if (hasCaseStudySignal(vendor.seedUrl)) candidates.push(cleanUrl(vendor.seedUrl));
const query = `site:${vendor.vendorDomain} case studies OR customer stories OR success stories OR testimonials`;
const searchResults = await firecrawl.search(query, {
limit: SEARCH_LIMIT_PER_VENDOR,
integration: "prometheus",
});
const webResults = Array.isArray(searchResults.web) ? searchResults.web : [];
for (const result of webResults.slice(0, SEARCH_LIMIT_PER_VENDOR)) {
const url = typeof result.url === "string" ? result.url : null;
if (!url) continue;
if (!domainMatches(url, vendor.vendorDomain)) continue;
if (!hasCaseStudySignal(url, result.title, result.description)) continue;
candidates.push(cleanUrl(url));
}
if (candidates.length === 0) candidates.push(cleanUrl(vendor.seedUrl));
const seen = new Set<string>();
return candidates
.filter((url) => {
const key = url.toLowerCase();
if (seen.has(key)) return false;
seen.add(key);
return true;
})
.slice(0, SOURCE_PAGE_LIMIT_PER_VENDOR);
}
async function extractFromSourcePage(vendor: VendorInput, sourcePageUrl: string): Promise<CaseStudyRow[]> {
const prompt = [
"Extract only visible official customer case study, customer story, success story, or testimonial entries from this vendor-owned page.",
"Do not use third-party review snippets, generic blog commentary, or reposted stories.",
"Do not invent ROI numbers, industries, sizes, or customer details; use null for missing text and an empty numeric_results array when no numeric metric is visible.",
`Return at most ${CASE_STUDY_LIMIT_PER_SOURCE} compact entries with quoted outcomes, ROI statements, and numeric metrics when visible.`,
].join(" ");
const doc = await firecrawl.scrape(sourcePageUrl, {
formats: [{ type: "json", schema: extractionSchema(), prompt }],
onlyMainContent: true,
integration: "prometheus",
});
const extracted = doc.json && typeof doc.json === "object" ? doc.json as Record<string, unknown> : {};
const vendorName = compactText(extracted.vendor_name) ?? deriveVendorName(vendor.vendorDomain);
const rawStudies = Array.isArray(extracted.case_studies) ? extracted.case_studies : [];
return rawStudies.slice(0, CASE_STUDY_LIMIT_PER_SOURCE).map((item) => {
const study = item && typeof item === "object" ? item as Record<string, unknown> : {};
const rawCaseUrl = compactText(study.case_study_url);
const caseUrl = rawCaseUrl && domainMatches(rawCaseUrl, vendor.vendorDomain) ? cleanUrl(rawCaseUrl) : sourcePageUrl;
return {
vendor_name: vendorName,
vendor_domain: vendor.vendorDomain,
source_page_url: sourcePageUrl,
case_study_title: compactText(study.case_study_title),
customer_name: compactText(study.customer_name),
customer_industry: compactText(study.customer_industry),
customer_size_or_type: compactText(study.customer_size_or_type),
use_case: compactText(study.use_case),
outcome_summary: compactText(study.outcome_summary),
numeric_results: normalizeStringArray(study.numeric_results),
case_study_url: caseUrl,
confidence: confidenceValue(study.confidence),
notes: compactText(study.notes),
};
}).filter((row) => row.case_study_title || row.customer_name || row.outcome_summary);
}
async function collectVendor(vendor: VendorInput): Promise<CaseStudyRow[]> {
const sourcePages = await discoverSourcePages(vendor);
const rows: CaseStudyRow[] = [];
for (const sourcePageUrl of sourcePages.slice(0, SOURCE_PAGE_LIMIT_PER_VENDOR)) {
try {
const extractedRows = await extractFromSourcePage(vendor, sourcePageUrl);
rows.push(...extractedRows);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error(`Skipping ${sourcePageUrl}: ${message}`);
}
}
const seen = new Set<string>();
return rows.filter((row) => {
const key = dedupeKey(row);
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
async function main() {
const allRows: CaseStudyRow[] = [];
for (const vendor of vendors.slice(0, maxCompanies)) {
const vendorRows = await collectVendor(vendor);
allRows.push(...vendorRows);
}
if (outputMode === "grouped_by_company") {
const grouped = vendors.slice(0, maxCompanies).map((vendor) => {
const caseStudies = allRows
.filter((row) => row.vendor_domain === vendor.vendorDomain)
.map(({ vendor_name, vendor_domain, ...study }) => study);
const vendorName = allRows.find((row) => row.vendor_domain === vendor.vendorDomain)?.vendor_name ?? deriveVendorName(vendor.vendorDomain);
return {
vendor_name: vendorName,
vendor_domain: vendor.vendorDomain,
case_studies: caseStudies,
};
});
process.stdout.write(JSON.stringify(grouped));
return;
}
process.stdout.write(JSON.stringify(allRows));
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.