ClinicalTrials.gov Trial Monitor
v1PublishedOfficial ClinicalTrials.gov study records matching a requested research query, normalized for trial monitoring and sponsor analysis.
Output & API
Preview the latest data, download it, or call this collector as an API.
Marketplace
Publish this collector so others can deploy it — you keep ownership.
Versions
Every build and self-heal appends a version. Pin one to lock runs to it.
import Firecrawl from "@mendable/firecrawl-js";
import { parseArgs } from "node:util";
const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
console.error("FIRECRAWL_API_KEY is not set");
process.exit(1);
}
const firecrawl = new Firecrawl({ apiKey });
const { values: flags } = parseArgs({
strict: true,
options: {
query: { type: "string" },
source: { type: "string" },
"max-trials": { type: "string" },
"output-mode": { type: "string" },
"phase-filter": { type: "string" },
"status-filter": { type: "string" },
"posted-within": { type: "string" },
"country-filter": { type: "string" },
"sponsor-filter": { type: "string" },
"include-outcomes": { type: "string" },
"include-locations": { type: "string" },
"snapshot-label": { type: "string" },
"sort-hint": { type: "string" },
},
});
function requireText(value: string | undefined, name: string): string {
const cleaned = value?.trim();
if (!cleaned) {
throw new Error(`OUT_OF_SCOPE: --${name} is required`);
}
return cleaned;
}
function optionalText(value: string | undefined, fallback: string): string {
const cleaned = value?.trim();
return cleaned ? cleaned : fallback;
}
function parseBoolean(value: string, name: string): boolean {
const lowered = value.trim().toLowerCase();
if (lowered === "true") return true;
if (lowered === "false") return false;
throw new Error(`OUT_OF_SCOPE: --${name} must be true or false`);
}
function parseList(value: string): string[] {
const parts = value.split(",");
const out: string[] = [];
for (const part of parts) {
const cleaned = part.trim();
if (cleaned) out.push(cleaned);
}
return out;
}
function normalizeStatus(value: string): string {
return value.trim().toUpperCase().split(" ").join("_").split("-").join("_");
}
function normalizePhase(value: string): string {
const compact = value.trim().toUpperCase().split(" ").join("").split("-").join("");
const phaseMap: Record<string, string> = {
EARLYPHASE1: "EARLY_PHASE1",
PHASE1: "PHASE1",
PHASE2: "PHASE2",
PHASE3: "PHASE3",
PHASE4: "PHASE4",
NOTAPPLICABLE: "NA",
NA: "NA",
};
const normalized = phaseMap[compact];
if (!normalized) {
throw new Error(`OUT_OF_SCOPE: unsupported phase filter "${value}"`);
}
return normalized;
}
function parseMaxTrials(value: string): number {
const parsed = Number(value);
if (!Number.isInteger(parsed) || parsed < 1 || parsed > 100) {
throw new Error("OUT_OF_SCOPE: --max-trials must be an integer from 1 to 100");
}
return parsed;
}
function parsePostedWithin(value: string): string {
const parts = value.trim().split(" ");
const amount = Number(parts[0]);
const unit = (parts[1] ?? "days").toLowerCase();
if (!Number.isInteger(amount) || amount < 1 || amount > 3650) {
throw new Error("OUT_OF_SCOPE: --posted-within must start with a whole number from 1 to 3650");
}
let days = amount;
if (unit === "day" || unit === "days") {
days = amount;
} else if (unit === "week" || unit === "weeks") {
days = amount * 7;
} else if (unit === "month" || unit === "months") {
days = amount * 30;
} else if (unit === "year" || unit === "years") {
days = amount * 365;
} else {
throw new Error("OUT_OF_SCOPE: --posted-within supports days, weeks, months, or years");
}
const date = new Date();
date.setUTCDate(date.getUTCDate() - days);
return date.toISOString().slice(0, 10);
}
function formatDateStruct(value: any): string | null {
return typeof value?.date === "string" ? value.date : null;
}
function asArray(value: any): any[] {
return Array.isArray(value) ? value : [];
}
function stringArray(value: any): string[] {
const arr = asArray(value);
const out: string[] = [];
for (const item of arr) {
if (typeof item === "string" && item.trim()) out.push(item.trim());
}
return out;
}
function mapOutcomes(value: any): any[] {
const arr = asArray(value);
const out: any[] = [];
for (const item of arr) {
out.push({
measure: typeof item?.measure === "string" ? item.measure : null,
description: typeof item?.description === "string" ? item.description : null,
time_frame: typeof item?.timeFrame === "string" ? item.timeFrame : null,
});
}
return out;
}
function mapInterventions(value: any): any[] {
const arr = asArray(value);
const out: any[] = [];
for (const item of arr) {
out.push({
type: typeof item?.type === "string" ? item.type : null,
name: typeof item?.name === "string" ? item.name : null,
description: typeof item?.description === "string" ? item.description : null,
other_names: stringArray(item?.otherNames),
});
}
return out;
}
function mapLocations(value: any): any[] {
const arr = asArray(value);
const out: any[] = [];
for (const item of arr) {
out.push({
facility: typeof item?.facility === "string" ? item.facility : null,
city: typeof item?.city === "string" ? item.city : null,
state: typeof item?.state === "string" ? item.state : null,
country: typeof item?.country === "string" ? item.country : null,
status: typeof item?.status === "string" ? item.status : null,
});
}
return out;
}
function addParam(parts: string[], name: string, value: string): void {
parts.push(`${encodeURIComponent(name)}=${encodeURIComponent(value)}`);
}
function buildSearchUrl(input: {
query: string;
maxTrials: number;
phaseFilter: string;
statusFilter: string;
postedAfter: string;
countryFilter: string;
sponsorFilter: string;
includeOutcomes: boolean;
includeLocations: boolean;
sortHint: string;
}): string {
const params: string[] = [];
addParam(params, "query.term", input.query);
addParam(params, "pageSize", String(input.maxTrials));
addParam(params, "format", "json");
const fields = [
"NCTId",
"OfficialTitle",
"BriefTitle",
"BriefSummary",
"OverallStatus",
"Phase",
"StudyType",
"Condition",
"InterventionType",
"InterventionName",
"InterventionDescription",
"InterventionOtherName",
"LeadSponsorName",
"CollaboratorName",
"CollaboratorClass",
"EnrollmentCount",
"EnrollmentType",
"StartDate",
"CompletionDate",
"PrimaryCompletionDate",
"StudyFirstPostDate",
"LastUpdatePostDate",
"StatusVerifiedDate",
"WhyStopped",
"DesignAllocation",
"DesignInterventionModel",
"DesignPrimaryPurpose",
"DesignMasking",
"VersionHolder",
];
if (input.includeOutcomes) {
fields.push(
"PrimaryOutcomeMeasure",
"PrimaryOutcomeDescription",
"PrimaryOutcomeTimeFrame",
"SecondaryOutcomeMeasure",
"SecondaryOutcomeDescription",
"SecondaryOutcomeTimeFrame",
);
}
if (input.includeLocations) {
fields.push("LocationFacility", "LocationCity", "LocationState", "LocationCountry", "LocationStatus");
}
addParam(params, "fields", fields.join(","));
if (input.statusFilter) {
const statuses = parseList(input.statusFilter).map(normalizeStatus);
if (statuses.length > 0) addParam(params, "filter.overallStatus", statuses.join(","));
}
const advanced: string[] = [];
advanced.push(`AREA[StudyFirstPostDate]RANGE[${input.postedAfter},MAX]`);
const phases = parseList(input.phaseFilter).map(normalizePhase);
if (phases.length === 1) {
advanced.push(`AREA[Phase]${phases[0]}`);
} else if (phases.length > 1) {
advanced.push(`AREA[Phase](${phases.join(" OR ")})`);
}
addParam(params, "filter.advanced", advanced.join(" AND "));
if (input.countryFilter) addParam(params, "query.locn", input.countryFilter);
if (input.sponsorFilter) addParam(params, "query.spons", input.sponsorFilter);
if (input.sortHint === "newest") {
addParam(params, "sort", "StudyFirstPostDate:desc");
} else if (input.sortHint === "updated" || input.sortHint === "recently_updated") {
addParam(params, "sort", "LastUpdatePostDate:desc");
} else if (input.sortHint !== "relevance") {
throw new Error("OUT_OF_SCOPE: --sort-hint must be newest, updated, recently_updated, or relevance");
}
return `https://clinicaltrials.gov/api/v2/studies?${params.join("&")}`;
}
function normalizeStudy(study: any, snapshotLabel: string, collectedAt: string, includeOutcomes: boolean, includeLocations: boolean): any {
const protocol = study?.protocolSection ?? {};
const identification = protocol.identificationModule ?? {};
const status = protocol.statusModule ?? {};
const sponsors = protocol.sponsorCollaboratorsModule ?? {};
const description = protocol.descriptionModule ?? {};
const conditions = protocol.conditionsModule ?? {};
const design = protocol.designModule ?? {};
const arms = protocol.armsInterventionsModule ?? {};
const outcomes = protocol.outcomesModule ?? {};
const contacts = protocol.contactsLocationsModule ?? {};
const derived = study?.derivedSection ?? {};
const nctId = typeof identification.nctId === "string" ? identification.nctId : null;
if (!nctId) {
throw new Error("study record missing nctId");
}
const sourcePageUrl = `https://clinicaltrials.gov/api/v2/studies/${nctId}`;
const studyUrl = `https://clinicaltrials.gov/study/${nctId}`;
const enrollment = design.enrollmentInfo
? {
count: typeof design.enrollmentInfo.count === "number" ? design.enrollmentInfo.count : null,
type: typeof design.enrollmentInfo.type === "string" ? design.enrollmentInfo.type : null,
}
: null;
return {
tracking_key: `${snapshotLabel}:${nctId}`,
snapshot_label: snapshotLabel,
collected_at: collectedAt,
nct_id: nctId,
study_title:
typeof identification.officialTitle === "string"
? identification.officialTitle
: typeof identification.briefTitle === "string"
? identification.briefTitle
: null,
study_url: studyUrl,
source_page_url: sourcePageUrl,
brief_summary: typeof description.briefSummary === "string" ? description.briefSummary : null,
overall_status: typeof status.overallStatus === "string" ? status.overallStatus : null,
phase: stringArray(design.phases),
study_type: typeof design.studyType === "string" ? design.studyType : null,
study_design: design.designInfo ?? null,
conditions: stringArray(conditions.conditions),
interventions: mapInterventions(arms.interventions),
sponsor_name: typeof sponsors.leadSponsor?.name === "string" ? sponsors.leadSponsor.name : null,
collaborators: asArray(sponsors.collaborators).map((item: any) => ({
name: typeof item?.name === "string" ? item.name : null,
class: typeof item?.class === "string" ? item.class : null,
})),
enrollment,
primary_outcomes: includeOutcomes ? mapOutcomes(outcomes.primaryOutcomes) : null,
secondary_outcomes: includeOutcomes ? mapOutcomes(outcomes.secondaryOutcomes) : null,
start_date: formatDateStruct(status.startDateStruct),
completion_date: formatDateStruct(status.completionDateStruct),
locations: includeLocations ? mapLocations(contacts.locations) : null,
confidence: 1,
notes: {
source: "ClinicalTrials.gov API v2",
study_first_posted_date: formatDateStruct(status.studyFirstPostDateStruct),
last_update_posted_date: formatDateStruct(status.lastUpdatePostDateStruct),
status_verified_date: typeof status.statusVerifiedDate === "string" ? status.statusVerifiedDate : null,
primary_completion_date: formatDateStruct(status.primaryCompletionDateStruct),
why_stopped: typeof status.whyStopped === "string" ? status.whyStopped : null,
version_holder: typeof derived.miscInfoModule?.versionHolder === "string" ? derived.miscInfoModule.versionHolder : null,
evidence_urls: [studyUrl, sourcePageUrl],
},
};
}
function groupBySponsor(trials: any[]): any[] {
const groups: Record<string, any[]> = {};
for (const trial of trials) {
const sponsor = typeof trial.sponsor_name === "string" && trial.sponsor_name ? trial.sponsor_name : "Unknown sponsor";
if (!groups[sponsor]) groups[sponsor] = [];
groups[sponsor].push(trial);
}
const names = Object.keys(groups).sort();
const out: any[] = [];
for (const name of names) {
out.push({
sponsor_name: name,
trial_count: groups[name].length,
trials: groups[name],
});
}
return out;
}
async function main() {
const query = requireText(flags.query, "query");
const source = requireText(flags.source, "source");
if (source !== "clinicaltrials_gov") {
throw new Error('OUT_OF_SCOPE: --source must be "clinicaltrials_gov"');
}
const maxTrials = parseMaxTrials(requireText(flags["max-trials"], "max-trials"));
const outputMode = requireText(flags["output-mode"], "output-mode");
if (outputMode !== "trial_rows" && outputMode !== "grouped_by_sponsor") {
throw new Error('OUT_OF_SCOPE: --output-mode must be "trial_rows" or "grouped_by_sponsor"');
}
const phaseFilter = optionalText(flags["phase-filter"], "");
const statusFilter = optionalText(flags["status-filter"], "RECRUITING,NOT_YET_RECRUITING");
const postedWithin = optionalText(flags["posted-within"], "30 days");
const countryFilter = optionalText(flags["country-filter"], "");
const sponsorFilter = optionalText(flags["sponsor-filter"], "");
const includeOutcomes = parseBoolean(optionalText(flags["include-outcomes"], "true"), "include-outcomes");
const includeLocations = parseBoolean(optionalText(flags["include-locations"], "true"), "include-locations");
const sortHint = optionalText(flags["sort-hint"], "newest");
const collectedAt = new Date().toISOString();
const snapshotLabel = optionalText(flags["snapshot-label"], collectedAt);
const postedAfter = parsePostedWithin(postedWithin);
const searchUrl = buildSearchUrl({
query,
maxTrials,
phaseFilter,
statusFilter,
postedAfter,
countryFilter,
sponsorFilter,
includeOutcomes,
includeLocations,
sortHint,
});
const scraped: any = await firecrawl.scrape(searchUrl, {
formats: ["rawHtml"],
integration: "prometheus",
});
const raw = typeof scraped?.rawHtml === "string" ? scraped.rawHtml : "";
if (!raw) {
throw new Error("ClinicalTrials.gov API response did not include raw JSON");
}
const parsed = JSON.parse(raw);
const studies = asArray(parsed.studies).slice(0, maxTrials);
const trials: any[] = [];
for (const study of studies) {
trials.push(normalizeStudy(study, snapshotLabel, collectedAt, includeOutcomes, includeLocations));
}
const base = {
snapshot_label: snapshotLabel,
collected_at: collectedAt,
source,
query,
output_mode: outputMode,
max_trials: maxTrials,
trial_count: trials.length,
filters: {
phase_filter: phaseFilter,
status_filter: statusFilter,
posted_within: postedWithin,
posted_after: postedAfter,
country_filter: countryFilter,
sponsor_filter: sponsorFilter,
include_outcomes: includeOutcomes,
include_locations: includeLocations,
sort_hint: sortHint,
},
evidence_url: searchUrl,
};
const out =
outputMode === "grouped_by_sponsor"
? { ...base, sponsors: groupBySponsor(trials) }
: { ...base, trials };
process.stdout.write(JSON.stringify(out));
}
main().catch((err) => {
console.error(err);
process.exit(1);
});
Deploy this collector to unlock schedules, the API endpoint, and destinations.