Government Procurement Opportunities
v1PublishedActive government procurement opportunities from the supplied official portal pages, returned as a simple bid table.
Output & API
Preview the latest data, download it, or call this collector as an API.
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import Firecrawl from "@mendable/firecrawl-js";
import { z } from "zod";
import { parseArgs } from "node:util";
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const { values: flags } = parseArgs({
strict: true,
options: {
"seed-urls": { type: "string" },
"max-items": { type: "string" },
query: { type: "string" },
},
});
const seedUrls = splitCsv(flags["seed-urls"] ?? "");
const query = clean(flags.query ?? "");
const maxItems = parseMaxItems(flags["max-items"] ?? "");
if (seedUrls.length === 0) {
console.error("OUT_OF_SCOPE: --seed-urls is required and must contain at least one URL");
process.exit(1);
}
if (!query) {
console.error("OUT_OF_SCOPE: --query is required");
process.exit(1);
}
const normalizedSeedUrls = seedUrls.map((url) => normalizeHttpUrl(url, null));
for (const url of normalizedSeedUrls) {
if (!url) {
console.error("OUT_OF_SCOPE: every --seed-urls entry must be a valid http or https URL");
process.exit(1);
}
}
const OpportunitySchema = z.object({
opportunities: z.array(z.object({
portal_name: z.string().nullable().optional(),
opportunity_title: z.string().nullable().optional(),
buyer_name: z.string().nullable().optional(),
location: z.string().nullable().optional(),
posted_date: z.string().nullable().optional(),
closing_date: z.string().nullable().optional(),
status: z.string().nullable().optional(),
opportunity_url: z.string().nullable().optional(),
source_page_url: z.string().nullable().optional(),
})).default([]),
});
type ExtractedOpportunity = z.infer<typeof OpportunitySchema>["opportunities"][number];
type OpportunityRow = {
portal_name: string | null;
opportunity_title: string | null;
buyer_name: string | null;
location: string | null;
posted_date: string | null;
closing_date: string | null;
status: string | null;
opportunity_url: string | null;
source_page_url: string | null;
};
async function main() {
const candidateUrls = await discoverCandidateUrls(normalizedSeedUrls as string[]);
if (candidateUrls.length === 0) {
throw new Error("no candidate procurement pages found from the supplied seed URLs");
}
const rows: OpportunityRow[] = [];
const seen = new Set<string>();
for (const sourcePageUrl of candidateUrls) {
if (rows.length >= maxItems) break;
console.error(`Extracting opportunities from ${sourcePageUrl}`);
const extracted = await extractOpportunities(sourcePageUrl);
for (const item of extracted) {
const row = normalizeOpportunity(item, sourcePageUrl);
if (!row) continue;
if (!isOpenOpportunity(row)) continue;
const key = dedupeKey(row);
if (seen.has(key)) continue;
seen.add(key);
rows.push(row);
if (rows.length >= maxItems) break;
}
}
process.stdout.write(JSON.stringify(rows));
}
async function discoverCandidateUrls(seeds: string[]): Promise<string[]> {
const candidates: string[] = [];
for (const seed of seeds) {
candidates.push(seed);
const host = hostFromUrl(seed);
if (!host) continue;
const searchQuery = `site:${host} ${query} open procurement opportunity tender bid solicitation contract`;
console.error(`Searching portal domain: ${searchQuery}`);
const result = await firecrawl.search(searchQuery, {
limit: Math.min(5, Math.max(2, maxItems)),
integration: "prometheus",
});
const searchItems = Array.isArray((result as any).web)
? (result as any).web
: Array.isArray((result as any).data)
? (result as any).data
: [];
for (const item of searchItems) {
const url = normalizeHttpUrl(item?.url ?? item?.metadata?.sourceURL, seed);
if (!url) continue;
if (hostFromUrl(url) !== host) continue;
if (!looksLikeProcurementUrl(url)) continue;
candidates.push(url);
}
}
return unique(candidates).slice(0, Math.min(12, Math.max(3, maxItems * 2)));
}
async function extractOpportunities(sourcePageUrl: string): Promise<ExtractedOpportunity[]> {
const prompt = [
"Extract active or currently open government procurement opportunities from this official procurement page.",
`Only include opportunities matching this keyword or theme: ${query}.`,
"Return one row per active bid, tender, solicitation, RFP, RFQ, contract notice, or opportunity.",
"Ignore closed, awarded, expired, archived, cancelled, or withdrawn notices when that status is visible.",
"Do not invent dates, buyer names, status, locations, URLs, or portal names.",
"Use null for fields that are not visible.",
"For source_page_url, use the page URL where the row was found.",
].join(" ");
const doc = await firecrawl.scrape(sourcePageUrl, {
formats: [{
type: "json",
prompt,
schema: OpportunitySchema,
}],
onlyMainContent: true,
waitFor: 2000,
timeout: 60000,
integration: "prometheus",
});
const parsed = OpportunitySchema.safeParse((doc as any).json);
if (!parsed.success) {
console.error(`Skipping ${sourcePageUrl}: extracted JSON did not match the expected shape`);
return [];
}
return parsed.data.opportunities;
}
function normalizeOpportunity(item: ExtractedOpportunity, fallbackSourceUrl: string): OpportunityRow | null {
const sourcePageUrl = normalizeHttpUrl(item.source_page_url, fallbackSourceUrl) ?? fallbackSourceUrl;
const opportunityUrl = normalizeHttpUrl(item.opportunity_url, sourcePageUrl);
const title = nullableText(item.opportunity_title);
const buyerName = nullableText(item.buyer_name);
if (!title && !opportunityUrl) return null;
return {
portal_name: nullableText(item.portal_name) ?? portalNameFromUrl(sourcePageUrl),
opportunity_title: title,
buyer_name: buyerName,
location: nullableText(item.location),
posted_date: nullableText(item.posted_date),
closing_date: nullableText(item.closing_date),
status: nullableText(item.status),
opportunity_url: opportunityUrl,
source_page_url: sourcePageUrl,
};
}
function isOpenOpportunity(row: OpportunityRow): boolean {
const statusText = clean(row.status ?? "").toLowerCase();
if (/\b(closed|expired|awarded|award notice|archived|cancelled|canceled|withdrawn|inactive)\b/.test(statusText)) {
return false;
}
const closingDate = parseVisibleDate(row.closing_date);
if (closingDate && closingDate.getTime() < startOfTodayUtc().getTime()) {
return false;
}
if (!statusText) return true;
return /\b(open|active|current|published|accepting|live|opportunity)\b/.test(statusText) || Boolean(closingDate);
}
function dedupeKey(row: OpportunityRow): string {
const portal = slug(row.portal_name ?? hostFromUrl(row.source_page_url ?? "") ?? "");
const notice = row.opportunity_url ? stripTracking(row.opportunity_url) : [
row.opportunity_title,
row.buyer_name,
row.closing_date,
].map((part) => slug(part ?? "")).join("|");
return `${portal}:${notice}`;
}
function splitCsv(value: string): string[] {
return value.split(",").map(clean).filter(Boolean);
}
function parseMaxItems(value: string): number {
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed < 1 || parsed > 100) {
console.error("OUT_OF_SCOPE: --max-items must be an integer from 1 to 100");
process.exit(1);
}
return parsed;
}
function normalizeHttpUrl(value: unknown, baseUrl: string | null): string | null {
const text = clean(String(value ?? ""));
if (!text) return null;
try {
const parsed = baseUrl ? new URL(text, baseUrl) : new URL(text);
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") return null;
parsed.hash = "";
return parsed.toString();
} catch {
return null;
}
}
function hostFromUrl(value: string): string | null {
try {
return new URL(value).hostname.replace(/^www\./, "").toLowerCase();
} catch {
return null;
}
}
function looksLikeProcurementUrl(value: string): boolean {
const text = value.toLowerCase();
return /\b(procurement|tender|bid|rfp|rfq|solicitation|contract|notice|opportunit|opp|supplier|vendor)\b/.test(text);
}
function portalNameFromUrl(value: string): string | null {
const host = hostFromUrl(value);
if (!host) return null;
if (host.includes("sam.gov")) return "SAM.gov";
if (host.includes("contractsfinder.service.gov.uk")) return "Contracts Finder";
if (host.includes("find-tender.service.gov.uk")) return "Find a Tender";
if (host.includes("ted.europa.eu")) return "Tenders Electronic Daily";
if (host.includes("canadabuys.canada.ca")) return "CanadaBuys";
if (host.includes("tenders.gov.au")) return "AusTender";
if (host.includes("gets.govt.nz")) return "GETS";
if (host.includes("gebiz.gov.sg")) return "GeBIZ";
return host;
}
function nullableText(value: unknown): string | null {
const text = clean(String(value ?? ""));
return text || null;
}
function clean(value: string): string {
return value.replace(/\s+/g, " ").trim();
}
function unique(values: string[]): string[] {
return Array.from(new Set(values.map(clean).filter(Boolean)));
}
function slug(value: string): string {
return clean(value)
.toLowerCase()
.replace(/https?:\/\//, "")
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-|-$/g, "")
.slice(0, 180);
}
function stripTracking(value: string): string {
try {
const url = new URL(value);
const keep = new URL(url.origin + url.pathname);
const stableParams = ["id", "noticeId", "notice-id", "opp", "opportunityId", "opportunity-id"];
for (const name of stableParams) {
const param = url.searchParams.get(name);
if (param) keep.searchParams.set(name, param);
}
return keep.toString();
} catch {
return slug(value);
}
}
function parseVisibleDate(value: string | null): Date | null {
const text = clean(value ?? "");
if (!text) return null;
const withoutOrdinals = text
.replace(/\b(\d{1,2})(st|nd|rd|th)\b/gi, "$1")
.replace(/\bat\b/gi, " ");
const parsed = Date.parse(withoutOrdinals);
if (Number.isNaN(parsed)) return null;
return new Date(parsed);
}
function startOfTodayUtc(): Date {
const now = new Date();
return new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()));
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.