Event Sponsor And Exhibitor Prospects
v1PublishedOfficial event-domain sponsor, partner, and exhibitor records for B2B prospecting.
Output & API
Preview the latest data, download it, or call this collector as an API.
| # | notes | topic | event_url | confidence | event_city | event_name | event_type | event_region | sponsor_name | event_location | company_website | event_date_text | source_page_url | last_verified_at | sponsorship_tier | relationship_type | tier_evidence_text | secondary_source_urls | exhibitor_or_profile_url |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | null | AI | https://ainextconference.com | 0.9 | Las Vegas | AINext Conference | Conference | NV | EXL | Las Vegas | null | 8 April 2027 | https://ainextconference.com/ | 2026-07-02T11:41:48.961Z | null | media partner | null | [] | https://ainextconference.com/media-partners/ |
| 1 | null | AI | https://ainextconference.com | 0.9 | Las Vegas | AINext Conference | Conference | NV | Reckonsys Tech Labs | Las Vegas | null | 8 April 2027 | https://ainextconference.com/ | 2026-07-02T11:41:48.961Z | null | partner | null | [] | null |
| 2 | null | AI | https://ainextconference.com | 0.8 | Las Vegas | AINext Conference | Conference | NV | Xela | Las Vegas | null | 8 April 2027 | https://ainextconference.com/ | 2026-07-02T11:41:48.961Z | null | exhibitor | null | [] | null |
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";
import { parseArgs } from "node:util";
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const { values: flags } = parseArgs({
strict: true,
options: {
topic: { type: "string" },
"seed-sites": { type: "string" },
"max-events": { type: "string" },
"max-pages-per-event": { type: "string" },
"output-mode": { type: "string" },
region: { type: "string" },
city: { type: "string" },
"date-range": { type: "string" },
"event-type": { type: "string" },
language: { type: "string" },
"include-exhibitors": { type: "string" },
},
});
const topic = cleanString(flags.topic);
const seedSitesText = cleanString(flags["seed-sites"]);
const region = cleanString(flags.region) || "global";
const city = cleanString(flags.city) || "";
const dateRange = cleanString(flags["date-range"]) || "upcoming 12 months";
const eventType = cleanString(flags["event-type"]) || "conference";
const language = cleanString(flags.language) || "English";
const includeExhibitors = parseBooleanFlag(cleanString(flags["include-exhibitors"]) || "true", "include-exhibitors");
const outputMode = cleanString(flags["output-mode"]);
const maxEvents = parsePositiveInteger(flags["max-events"], "max-events", 50);
const maxPagesPerEvent = parsePositiveInteger(flags["max-pages-per-event"], "max-pages-per-event", 50);
if (!topic && !seedSitesText) {
throw new Error("OUT_OF_SCOPE: provide at least one of --topic or --seed-sites");
}
if (!outputMode) {
console.error("--output-mode is required");
process.exit(1);
}
if (!["sponsor_rows", "grouped_by_event"].includes(outputMode)) {
throw new Error("OUT_OF_SCOPE: --output-mode must be sponsor_rows or grouped_by_event");
}
if (!flags["max-events"]) {
console.error("--max-events is required");
process.exit(1);
}
if (!flags["max-pages-per-event"]) {
console.error("--max-pages-per-event is required");
process.exit(1);
}
const sponsorPageTerms = [
"sponsor",
"sponsors",
"partner",
"partners",
"exhibitor",
"exhibitors",
"expo",
"companies",
"showcase",
"supporters",
"media partners",
"startup partners",
"community partners",
];
const likelyPaths = [
"/sponsors",
"/sponsor",
"/partners",
"/partner",
"/sponsors-partners",
"/sponsors-and-partners",
"/partners-and-sponsors",
"/exhibitors",
"/exhibitor",
"/exhibitor-list",
"/exhibitors-list",
"/sponsor-list",
"/expo",
"/exhibition",
"/showcase",
"/supporters",
"/media-partners",
"/startup-partners",
"/community-partners",
"/sponsorship",
"/sponsor-us",
];
const aggregatorHosts = [
"10times.com",
"allevents.in",
"conferencealerts.com",
"eventbrite.",
"events.com",
"facebook.com",
"instagram.com",
"linkedin.com",
"meetup.com",
"ticketmaster.",
"twitter.com",
"wikipedia.org",
"x.com",
"youtube.com",
];
const extractionSchema = {
type: "object",
additionalProperties: false,
properties: {
event_name: { type: ["string", "null"] },
event_location: { type: ["string", "null"] },
event_city: { type: ["string", "null"] },
event_region: { type: ["string", "null"] },
event_date_text: { type: ["string", "null"] },
event_type: { type: ["string", "null"] },
sponsors: {
type: "array",
items: {
type: "object",
additionalProperties: false,
properties: {
sponsor_name: { type: "string" },
sponsorship_tier: { type: ["string", "null"] },
relationship_type: { type: ["string", "null"] },
exhibitor_or_profile_url: { type: ["string", "null"] },
company_website: { type: ["string", "null"] },
tier_evidence_text: { type: ["string", "null"] },
notes: { type: ["string", "null"] },
confidence: { type: "number" },
},
required: [
"sponsor_name",
"sponsorship_tier",
"relationship_type",
"exhibitor_or_profile_url",
"company_website",
"tier_evidence_text",
"notes",
"confidence",
],
},
},
},
required: [
"event_name",
"event_location",
"event_city",
"event_region",
"event_date_text",
"event_type",
"sponsors",
],
};
async function main() {
const seedEvents = parseSeedSites(seedSitesText).slice(0, maxEvents);
const discoveredEvents = seedEvents.length >= maxEvents ? [] : await discoverEvents(maxEvents - seedEvents.length);
const events = dedupeEvents([...seedEvents, ...discoveredEvents]).slice(0, maxEvents);
if (events.length === 0) {
throw new Error("no official event domains found for the supplied filters");
}
const allRows: SponsorRow[] = [];
for (const event of events) {
console.error(`Processing ${event.url}`);
const rows = await processEvent(event);
allRows.push(...rows);
}
const verifiedAt = new Date().toISOString();
const withVerification = allRows.map((row) => ({ ...row, last_verified_at: verifiedAt }));
if (outputMode === "grouped_by_event") {
process.stdout.write(JSON.stringify(groupByEvent(withVerification)));
return;
}
process.stdout.write(JSON.stringify(withVerification));
}
async function discoverEvents(limit: number): Promise<EventCandidate[]> {
if (limit <= 0) return [];
const locationParts = [city, region !== "global" ? region : ""].filter(Boolean).join(" ");
const query = [
"official event website",
topic,
eventType,
locationParts,
dateRange,
"sponsors partners exhibitors",
language,
]
.filter(Boolean)
.join(" ");
const searchResponse = await firecrawl.search(query, {
limit: Math.max(10, limit * 5),
integration: "prometheus",
} as any);
const results = getSearchResults(searchResponse);
const candidates: EventCandidate[] = [];
const seenHosts = new Set<string>();
for (const result of results) {
const url = normalizeUrl(getResultUrl(result));
if (!url) continue;
const origin = getOrigin(url);
if (!origin) continue;
const host = getHost(origin);
if (!host || seenHosts.has(host) || isAggregatorHost(host)) continue;
const text = `${getResultTitle(result)} ${getResultDescription(result)} ${url}`.toLowerCase();
if (!looksLikeEventResult(text)) continue;
if (topic && !looselyMentions(text, topic) && candidates.length >= limit) continue;
seenHosts.add(host);
candidates.push({
url: origin,
host,
event_name: cleanString(getResultTitle(result)) || null,
});
if (candidates.length >= limit) break;
}
return candidates;
}
async function processEvent(event: EventCandidate): Promise<SponsorRow[]> {
const candidateUrls = await findCandidatePages(event);
const rowsByName = new Map<string, SponsorAccumulator>();
for (const pageUrl of candidateUrls) {
let doc: any;
try {
doc = await scrapeForExtraction(pageUrl);
} catch (err) {
console.error(`Extraction scrape failed for ${pageUrl}: ${String(err)}`);
continue;
}
const pageSourceUrl = normalizeUrl(doc?.metadata?.sourceURL || doc?.url || pageUrl) || pageUrl;
const json = doc?.json || {};
let sponsors = Array.isArray(json.sponsors) ? json.sponsors : [];
if (sponsors.length === 0 && doc?.html) {
sponsors = extractSponsorsFromHtml(doc.html, pageSourceUrl, event.host);
}
for (const sponsor of sponsors) {
const sponsorName = cleanString(sponsor?.sponsor_name);
if (!sponsorName || sponsorName.length < 2) continue;
const relationshipType = cleanString(sponsor?.relationship_type) || null;
if (!includeExhibitors && relationshipType && relationshipType.toLowerCase().includes("exhibitor")) continue;
const confidence = clampConfidence(Number(sponsor?.confidence || 0.7));
if (confidence < 0.55) continue;
const row: SponsorRow = {
event_name: cleanString(json.event_name) || event.event_name || hostToName(event.host),
event_url: event.url,
event_location: cleanString(json.event_location) || null,
event_city: cleanString(json.event_city) || city || null,
event_region: cleanString(json.event_region) || (region !== "global" ? region : null),
event_date_text: cleanString(json.event_date_text) || null,
event_type: cleanString(json.event_type) || eventType,
topic: topic || null,
sponsor_name: sponsorName,
sponsorship_tier: cleanString(sponsor?.sponsorship_tier) || null,
relationship_type: relationshipType || inferRelationshipType(cleanString(sponsor?.sponsorship_tier)),
exhibitor_or_profile_url: absolutizeUrl(cleanString(sponsor?.exhibitor_or_profile_url), pageSourceUrl),
company_website: absolutizeUrl(cleanString(sponsor?.company_website), pageSourceUrl),
source_page_url: pageSourceUrl,
secondary_source_urls: [],
tier_evidence_text: cleanString(sponsor?.tier_evidence_text) || null,
notes: cleanString(sponsor?.notes) || null,
confidence,
last_verified_at: "",
};
const key = normalizeName(sponsorName);
const existing = rowsByName.get(key);
if (!existing) {
rowsByName.set(key, { row, evidenceUrls: new Set([pageSourceUrl]) });
} else {
mergeSponsor(existing, row, pageSourceUrl);
}
}
}
return [...rowsByName.values()].map(({ row, evidenceUrls }) => ({
...row,
secondary_source_urls: [...evidenceUrls].filter((url) => url !== row.source_page_url),
}));
}
async function findCandidatePages(event: EventCandidate): Promise<string[]> {
const urls: string[] = [];
const seen = new Set<string>();
function add(url: string | null | undefined) {
const normalized = normalizeUrl(url || "");
if (!normalized || seen.has(normalized)) return;
if (getHost(normalized) !== event.host) return;
seen.add(normalized);
urls.push(normalized);
}
add(event.url);
for (const path of likelyPaths) add(joinUrl(event.url, path));
const searchQuery = `site:${event.host} (${sponsorPageTerms.join(" OR ")}) ${topic || eventType}`;
try {
const searchResponse = await firecrawl.search(searchQuery, {
limit: Math.min(10, Math.max(4, maxPagesPerEvent * 2)),
integration: "prometheus",
} as any);
for (const result of getSearchResults(searchResponse)) {
const url = normalizeUrl(getResultUrl(result));
if (!url || getHost(url) !== event.host) continue;
const text = `${getResultTitle(result)} ${getResultDescription(result)} ${url}`.toLowerCase();
if (containsSponsorTerm(text)) add(url);
}
} catch (err) {
console.error(`Search fallback failed for ${event.host}: ${String(err)}`);
}
const homepage = await scrapeBasic(event.url);
if (homepage) {
for (const link of extractLikelyLinks(homepage.html || "", event.url)) add(link);
for (const link of Array.isArray(homepage.links) ? homepage.links : []) {
const absolute = absolutizeUrl(cleanString(link), event.url);
if (absolute && containsSponsorTerm(absolute.toLowerCase())) add(absolute);
}
}
return urls.slice(0, maxPagesPerEvent);
}
async function scrapeBasic(url: string): Promise<any | null> {
try {
const response = await withRetry(() =>
firecrawl.scrape(url, {
formats: ["html", "links"],
onlyMainContent: false,
waitFor: 1500,
timeout: 30000,
integration: "prometheus",
}),
);
return unwrapDocument(response);
} catch (err) {
console.error(`Basic scrape failed for ${url}: ${String(err)}`);
return null;
}
}
async function scrapeForExtraction(url: string): Promise<any> {
const prompt = [
"Extract companies that are clearly listed on this official event page as sponsors, partners, exhibitors, supporters, media partners, community partners, startup partners, or similar commercial/event relationships.",
"Prefer precision over recall. Do not invent names from generic text, attendee examples, navigation menus, testimonials, or unrelated article content.",
"Infer sponsorship_tier only from nearby section headings or visible labels such as title sponsor, platinum, gold, silver, bronze, diamond, strategic partner, presenting partner, innovation partner, ecosystem partner, exhibitor, startup exhibitor, media partner, or community partner.",
"Use null when a field is not visibly supported. Include profile URLs or outbound company websites only when present on the page.",
`The target filters are topic=${topic || "not specified"}, region=${region}, city=${city || "not specified"}, date_range=${dateRange}, event_type=${eventType}, preferred_language=${language}.`,
].join(" ");
const response = await withRetry(() =>
firecrawl.scrape(url, {
formats: [
"markdown",
"html",
{
type: "json",
prompt,
schema: extractionSchema,
},
],
onlyMainContent: false,
waitFor: 2500,
timeout: 45000,
integration: "prometheus",
}),
);
return unwrapDocument(response);
}
async function withRetry<T>(operation: () => Promise<T>): Promise<T> {
let lastError: unknown;
for (let attempt = 0; attempt < 2; attempt++) {
try {
return await operation();
} catch (err) {
lastError = err;
console.error(`Attempt ${attempt + 1} failed: ${String(err)}`);
}
}
throw lastError;
}
function parseSeedSites(input: string): EventCandidate[] {
const candidates: EventCandidate[] = [];
const seen = new Set<string>();
for (const piece of input.split(",")) {
const normalized = normalizeUrl(piece);
if (!normalized) continue;
const origin = getOrigin(normalized);
if (!origin) continue;
const host = getHost(origin);
if (!host || seen.has(host)) continue;
seen.add(host);
candidates.push({ url: origin, host, event_name: null });
}
return candidates;
}
function dedupeEvents(events: EventCandidate[]): EventCandidate[] {
const seen = new Set<string>();
const deduped: EventCandidate[] = [];
for (const event of events) {
if (seen.has(event.host)) continue;
seen.add(event.host);
deduped.push(event);
}
return deduped;
}
function extractLikelyLinks(html: string, baseUrl: string): string[] {
const $ = cheerio.load(html);
const links: string[] = [];
$("a[href]").each((_, element) => {
const href = cleanString($(element).attr("href"));
const text = cleanString($(element).text()).toLowerCase();
const url = absolutizeUrl(href, baseUrl);
if (!url) return;
const haystack = `${text} ${url}`.toLowerCase();
if (containsSponsorTerm(haystack)) links.push(url);
});
return links;
}
function extractSponsorsFromHtml(html: string, baseUrl: string, eventHost: string): any[] {
const $ = cheerio.load(html);
const sponsors = new Map<string, any>();
$("a[href], img[alt], [aria-label]").each((_, element) => {
const el = $(element);
const href = cleanString(el.attr("href"));
const anchor = el.is("a") ? el : el.closest("a[href]");
const anchorHref = cleanString(anchor.attr("href"));
const rawName = cleanCompanyName(
cleanString(el.attr("alt")) ||
cleanString(el.attr("aria-label")) ||
cleanString(el.attr("title")) ||
cleanString(anchor.text()) ||
cleanString(el.text()),
);
if (!isLikelyCompanyName(rawName)) return;
const context = getElementContext($, el);
const sourceHint = `${context} ${baseUrl}`.toLowerCase();
if (!containsSponsorTerm(sourceHint) && !containsTierTerm(sourceHint)) return;
const tier = inferTierFromContext(context);
const relationship = inferRelationshipType(tier || context) || inferRelationshipFromContext(context);
const url = absolutizeUrl(anchorHref || href, baseUrl);
const isOutbound = Boolean(url && getHost(url) && getHost(url) !== eventHost);
const key = normalizeName(rawName);
if (!key || sponsors.has(key)) return;
sponsors.set(key, {
sponsor_name: rawName,
sponsorship_tier: tier,
relationship_type: relationship,
exhibitor_or_profile_url: url && !isOutbound ? url : null,
company_website: url && isOutbound ? url : null,
tier_evidence_text: tier ? nearestHeadingText($, el) || context.slice(0, 180) : null,
notes: "Extracted from visible sponsor, partner, or exhibitor page markup.",
confidence: 0.65,
});
});
return [...sponsors.values()];
}
function groupByEvent(rows: SponsorRow[]): GroupedEvent[] {
const groups = new Map<string, GroupedEvent>();
for (const row of rows) {
const key = row.event_url;
if (!groups.has(key)) {
groups.set(key, {
event_name: row.event_name,
event_url: row.event_url,
event_location: row.event_location,
event_city: row.event_city,
event_region: row.event_region,
event_date_text: row.event_date_text,
event_type: row.event_type,
topic: row.topic,
last_verified_at: row.last_verified_at,
sponsors: [],
});
}
const { event_name, event_url, event_location, event_city, event_region, event_date_text, event_type, topic, last_verified_at, ...sponsor } = row;
groups.get(key)!.sponsors.push(sponsor);
}
return [...groups.values()].filter((event) => event.sponsors.length > 0);
}
function mergeSponsor(existing: SponsorAccumulator, incoming: SponsorRow, evidenceUrl: string) {
existing.evidenceUrls.add(evidenceUrl);
const row = existing.row;
if (tierStrength(incoming.sponsorship_tier) > tierStrength(row.sponsorship_tier)) {
row.sponsorship_tier = incoming.sponsorship_tier;
row.tier_evidence_text = incoming.tier_evidence_text;
}
if (!row.relationship_type && incoming.relationship_type) row.relationship_type = incoming.relationship_type;
if (!row.exhibitor_or_profile_url && incoming.exhibitor_or_profile_url) row.exhibitor_or_profile_url = incoming.exhibitor_or_profile_url;
if (!row.company_website && incoming.company_website) row.company_website = incoming.company_website;
if (!row.event_location && incoming.event_location) row.event_location = incoming.event_location;
if (!row.event_city && incoming.event_city) row.event_city = incoming.event_city;
if (!row.event_region && incoming.event_region) row.event_region = incoming.event_region;
if (!row.event_date_text && incoming.event_date_text) row.event_date_text = incoming.event_date_text;
if (!row.notes && incoming.notes) row.notes = incoming.notes;
row.confidence = Math.max(row.confidence, incoming.confidence);
}
function tierStrength(tier: string | null): number {
const value = (tier || "").toLowerCase();
if (!value) return 0;
if (value.includes("title") || value.includes("presenting")) return 100;
if (value.includes("diamond")) return 90;
if (value.includes("platinum")) return 80;
if (value.includes("strategic")) return 75;
if (value.includes("gold")) return 70;
if (value.includes("silver")) return 60;
if (value.includes("bronze")) return 50;
if (value.includes("innovation") || value.includes("ecosystem")) return 45;
if (value.includes("media") || value.includes("community")) return 30;
if (value.includes("exhibitor")) return 20;
return 10;
}
function inferRelationshipType(tier: string): string | null {
const value = tier.toLowerCase();
if (!value) return null;
if (value.includes("exhibitor")) return "exhibitor";
if (value.includes("partner")) return "partner";
if (value.includes("sponsor")) return "sponsor";
return null;
}
function getSearchResults(response: any): any[] {
if (Array.isArray(response)) return response;
if (Array.isArray(response?.data)) return response.data;
if (Array.isArray(response?.web)) return response.web;
if (Array.isArray(response?.results)) return response.results;
return [];
}
function unwrapDocument(response: any): any {
if (response?.success === false) throw new Error(response.error || "Firecrawl scrape returned success=false");
return response?.data || response;
}
function getResultUrl(result: any): string {
return cleanString(result?.url || result?.metadata?.sourceURL || result?.metadata?.url || result?.sourceURL);
}
function getResultTitle(result: any): string {
return cleanString(result?.title || result?.metadata?.title);
}
function getResultDescription(result: any): string {
return cleanString(result?.description || result?.snippet || result?.markdown || result?.metadata?.description);
}
function looksLikeEventResult(text: string): boolean {
return /(conference|summit|expo|event|trade show|congress|festival|forum|convention)/i.test(text);
}
function containsSponsorTerm(text: string): boolean {
return /(sponsor|partner|exhibitor|expo|showcase|supporter|media.?partner|startup.?partner|community.?partner|sponsorship)/i.test(text);
}
function containsTierTerm(text: string): boolean {
return /(title|presenting|diamond|platinum|gold|silver|bronze|strategic|innovation|ecosystem|media|community).{0,30}(sponsor|partner|exhibitor)|\b(exhibitor|sponsor|partner)s?\b/i.test(text);
}
function inferTierFromContext(text: string): string | null {
const value = text.toLowerCase();
const tiers = [
"title sponsor",
"presenting sponsor",
"diamond sponsor",
"platinum sponsor",
"gold sponsor",
"silver sponsor",
"bronze sponsor",
"strategic partner",
"presenting partner",
"innovation partner",
"ecosystem partner",
"startup exhibitor",
"media partner",
"community partner",
"exhibitor",
"sponsor",
"partner",
];
for (const tier of tiers) {
if (value.includes(tier)) return tier.replace(/\b\w/g, (letter) => letter.toUpperCase());
}
return null;
}
function inferRelationshipFromContext(text: string): string | null {
const value = text.toLowerCase();
if (value.includes("exhibitor")) return "exhibitor";
if (value.includes("partner")) return "partner";
if (value.includes("sponsor")) return "sponsor";
return null;
}
function getElementContext($: cheerio.CheerioAPI, el: cheerio.Cheerio<any>): string {
const heading = nearestHeadingText($, el);
const containerText = cleanString(el.closest("li, article, section, div").text()).slice(0, 600);
return cleanString(`${heading} ${containerText}`);
}
function nearestHeadingText($: cheerio.CheerioAPI, el: cheerio.Cheerio<any>): string {
let cursor = el;
for (let depth = 0; depth < 5; depth++) {
const previous = cursor.prevAll("h1,h2,h3,h4,h5,h6").first();
if (previous.length) return cleanString(previous.text());
const parent = cursor.parent();
if (!parent.length) break;
const parentPrevious = parent.prevAll("h1,h2,h3,h4,h5,h6").first();
if (parentPrevious.length) return cleanString(parentPrevious.text());
cursor = parent;
}
return "";
}
function cleanCompanyName(value: string): string {
return cleanString(value)
.replace(/\b(logo|sponsor|partner|exhibitor|image|visit website|learn more)\b/gi, " ")
.replace(/\.(png|jpg|jpeg|webp|svg)$/i, "")
.replace(/\s+/g, " ")
.trim();
}
function isLikelyCompanyName(value: string): boolean {
if (!value || value.length < 2 || value.length > 90) return false;
const lower = value.toLowerCase();
if (/^(home|menu|tickets|register|agenda|speakers|sponsors|partners|exhibitors|contact|about|privacy|terms)$/i.test(value)) return false;
if (containsSponsorTerm(lower) && value.split(/\s+/).length <= 3) return false;
if (!/[a-z0-9]/i.test(value)) return false;
return true;
}
function looselyMentions(text: string, needle: string): boolean {
const normalizedNeedle = needle.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim();
if (!normalizedNeedle) return true;
return normalizedNeedle.split(/\s+/).some((part) => part.length > 1 && text.includes(part));
}
function isAggregatorHost(host: string): boolean {
return aggregatorHosts.some((blocked) => host.includes(blocked));
}
function joinUrl(base: string, path: string): string | null {
try {
return new URL(path, base).toString();
} catch {
return null;
}
}
function absolutizeUrl(value: string, base: string): string | null {
if (!value || value === "#") return null;
try {
return new URL(value, base).toString();
} catch {
return null;
}
}
function normalizeUrl(value: string): string | null {
const text = cleanString(value);
if (!text) return null;
try {
const withProtocol = /^https?:\/\//i.test(text) ? text : `https://${text}`;
const url = new URL(withProtocol);
url.hash = "";
if (url.pathname !== "/" && url.pathname.endsWith("/")) {
url.pathname = url.pathname.replace(/\/+$/, "");
}
return url.toString();
} catch {
return null;
}
}
function getOrigin(value: string): string | null {
try {
return new URL(value).origin;
} catch {
return null;
}
}
function getHost(value: string): string {
try {
return new URL(value).hostname.replace(/^www\./, "").toLowerCase();
} catch {
return "";
}
}
function hostToName(host: string): string {
return host
.replace(/^www\./, "")
.split(".")[0]
.replace(/[-_]+/g, " ")
.replace(/\b\w/g, (letter) => letter.toUpperCase());
}
function cleanString(value: unknown): string {
return String(value ?? "").replace(/\s+/g, " ").trim();
}
function normalizeName(value: string): string {
return value.toLowerCase().replace(/&/g, "and").replace(/[^a-z0-9]+/g, " ").trim();
}
function clampConfidence(value: number): number {
if (!Number.isFinite(value)) return 0.7;
return Math.max(0, Math.min(1, value));
}
function parsePositiveInteger(value: unknown, name: string, max: number): number {
if (!value) {
console.error(`--${name} is required`);
process.exit(1);
}
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed < 1) {
throw new Error(`OUT_OF_SCOPE: --${name} must be a positive integer`);
}
if (parsed > max) {
throw new Error(`OUT_OF_SCOPE: --${name} must be ${max} or less`);
}
return parsed;
}
function parseBooleanFlag(value: string, name: string): boolean {
if (value === "true") return true;
if (value === "false") return false;
throw new Error(`OUT_OF_SCOPE: --${name} must be true or false`);
}
interface EventCandidate {
url: string;
host: string;
event_name: string | null;
}
interface SponsorRow {
event_name: string;
event_url: string;
event_location: string | null;
event_city: string | null;
event_region: string | null;
event_date_text: string | null;
event_type: string;
topic: string | null;
sponsor_name: string;
sponsorship_tier: string | null;
relationship_type: string | null;
exhibitor_or_profile_url: string | null;
company_website: string | null;
source_page_url: string;
secondary_source_urls: string[];
tier_evidence_text: string | null;
notes: string | null;
confidence: number;
last_verified_at: string;
}
interface SponsorAccumulator {
row: SponsorRow;
evidenceUrls: Set<string>;
}
interface GroupedEvent {
event_name: string;
event_url: string;
event_location: string | null;
event_city: string | null;
event_region: string | null;
event_date_text: string | null;
event_type: string;
topic: string | null;
last_verified_at: string;
sponsors: Array<Omit<SponsorRow, "event_name" | "event_url" | "event_location" | "event_city" | "event_region" | "event_date_text" | "event_type" | "topic" | "last_verified_at">>;
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.