Prometheus

Job Listings Search collector facts

Publisher: sideguide (@sideguide).

Version: 1. Last updated: 2026-06-13T21:54:49.076Z.

Run this collector on demand, as an API endpoint, or on a schedule with Firecrawl Prometheus.

Sample fields: jobs, url, title, salary, company, location, datePosted, count, query, source, retrievedAt.

Parameters: query (string, required), location (string, required), max-results (number).

Job Listings Search

v1Published

Find current job listings for a query and location — title, company, location, salary, posting date, and URL. Parameters: query, location.

Output & API

Preview the latest data, download it, or call this collector as an API.

Author's sample data
jobs
count50
querysoftware engineer
sourcesimplyhired.com
locationSan Francisco, CA
retrievedAt2026-06-13T21:54:35.647Z
Parameters
--querystringrequiredJob title or keywords to search for, e.g. "software engineer". e.g. "software engineer"
--locationstringrequiredCity/region to search in, e.g. "San Francisco, CA". e.g. "San Francisco, CA"
--max-resultsnumberMaximum number of job listings to return (default 50). default 50

Marketplace

Publish this collector so others can deploy it — you keep ownership.

0 subscribers
sideguide@sideguide
0 runs in 14d · published 4h ago

Versions

Every build and self-heal appends a version. Pin one to lock runs to it.

managed by author
v1builtapprovedcurrent4h ago
How this script collects data
import { parseArgs } from "node:util";
import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";

// ---------------------------------------------------------------------------
// CLI parameters
// ---------------------------------------------------------------------------
const { values } = parseArgs({
  strict: true,
  options: {
    query: { type: "string" },
    location: { type: "string" },
    "max-results": { type: "string" },
  },
});

const query = (values.query ?? "").trim();
const location = (values.location ?? "").trim();
const maxResults = Math.max(1, Number(values["max-results"] ?? "50") || 50);

if (!query) {
  console.error("Missing required --query parameter");
  process.exit(1);
}
if (!location) {
  console.error("Missing required --location parameter");
  process.exit(1);
}

const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
  console.error("Missing FIRECRAWL_API_KEY environment variable");
  process.exit(1);
}

const firecrawl = new Firecrawl({ apiKey });
const ORIGIN = "https://www.simplyhired.com";

type Job = {
  title: string;
  company: string | null;
  location: string | null;
  salary: string | null;
  datePosted: string | null;
  url: string;
};

// Build a SimplyHired search URL. SimplyHired paginates with an opaque
// `cursor` token that the previous page exposes in its embedded state.
function buildSearchUrl(cursor: string | null): string {
  const params = new URLSearchParams();
  params.set("q", query);
  params.set("l", location);
  if (cursor) params.set("cursor", cursor);
  return `${ORIGIN}/search?${params.toString()}`;
}

// SimplyHired ships the full result set as JSON inside the Next.js
// `__NEXT_DATA__` <script> blob, so we parse that rather than scraping
// rendered markup. The blob survives only in rawHtml (html strips <script>).
function parsePage(rawHtml: string): {
  jobs: Job[];
  nextCursor: string | null;
} {
  const $ = cheerio.load(rawHtml);
  const blob = $("#__NEXT_DATA__").first().html();
  if (!blob) {
    throw new Error(
      "no __NEXT_DATA__ blob found on SimplyHired search page",
    );
  }

  let parsed: any;
  try {
    parsed = JSON.parse(blob);
  } catch {
    throw new Error("failed to JSON.parse __NEXT_DATA__ blob");
  }

  const pageProps = parsed?.props?.pageProps;
  const rawJobs = pageProps?.jobs;
  if (!Array.isArray(rawJobs)) {
    throw new Error("no jobs array in SimplyHired page state");
  }

  const jobs: Job[] = rawJobs.map((j: any) => {
    // botUrl is the clean, share-safe relative path (e.g. /job/<key>);
    // fall back to constructing it from the jobKey.
    const path: string =
      typeof j.botUrl === "string" && j.botUrl.startsWith("/")
        ? j.botUrl
        : `/job/${j.jobKey}`;

    let datePosted: string | null = null;
    if (typeof j.dateOnIndeed === "number" && j.dateOnIndeed > 0) {
      const d = new Date(j.dateOnIndeed);
      if (!Number.isNaN(d.getTime())) {
        datePosted = d.toISOString().slice(0, 10);
      }
    }

    const salary =
      typeof j.salaryInfo === "string" && j.salaryInfo.trim()
        ? j.salaryInfo.trim()
        : null;

    return {
      title: typeof j.title === "string" ? j.title : "",
      company: typeof j.company === "string" ? j.company : null,
      location: typeof j.location === "string" ? j.location : null,
      salary,
      datePosted,
      url: `${ORIGIN}${path}`,
    };
  });

  // The next page's cursor is keyed by page number in pageCursors.
  const currentPage = Number(pageProps?.currentPageNumber ?? 1);
  const cursors = pageProps?.pageCursors ?? {};
  const nextCursor =
    typeof cursors?.[String(currentPage + 1)] === "string"
      ? cursors[String(currentPage + 1)]
      : null;

  return { jobs, nextCursor };
}

async function scrapeRawHtml(url: string): Promise<string> {
  const res: any = await firecrawl.scrape(url, {
    formats: ["rawHtml"],
    onlyMainContent: false,
    integration: "prometheus",
  });
  const html: unknown = res?.rawHtml ?? res?.data?.rawHtml;
  if (typeof html !== "string" || !html) {
    throw new Error("SimplyHired scrape returned no rawHtml content");
  }
  return html;
}

async function main() {
  const collected: Job[] = [];
  const seen = new Set<string>();
  let cursor: string | null = null;

  // Safety cap on page fetches so a never-ending cursor chain can't loop.
  const maxPages = Math.min(15, Math.ceil(maxResults / 20) + 1);

  for (let page = 0; page < maxPages; page++) {
    const url = buildSearchUrl(cursor);
    const rawHtml = await scrapeRawHtml(url);
    const { jobs, nextCursor } = parsePage(rawHtml);

    for (const job of jobs) {
      if (seen.has(job.url)) continue;
      seen.add(job.url);
      collected.push(job);
    }

    console.error(
      `page ${page + 1}: +${jobs.length} jobs (total ${collected.length})`,
    );

    if (collected.length >= maxResults) break;
    if (!nextCursor) break;
    cursor = nextCursor;
  }

  const jobs = collected.slice(0, maxResults);

  const out = {
    source: "simplyhired.com",
    query,
    location,
    retrievedAt: new Date().toISOString(),
    count: jobs.length,
    jobs,
  };

  process.stdout.write(JSON.stringify(out));
}

main().catch((err) => {
  console.error(err instanceof Error ? err.message : String(err));
  process.exit(1);
});
deploy to unlock

Deploy this collector to unlock schedules, the API endpoint, and destinations.

One person builds it. Everyone keeps it fresh.
Job Listings Search Data Collector | Firecrawl Prometheus