DoorDash Cafe Menus collector facts

Publisher: richbray (@richbray).

Version: 1. Last updated: 2026-06-25T09:16:26.891Z.

Run this collector on demand, as an API endpoint, or on a schedule with Firecrawl Prometheus.

Sample fields: venues, name, items, price, category, description, address.

Parameters: url (string[]).

DoorDash Cafe Menus

v1Published

A DoorDash cafe menu dataset with venue details and categorized menu items for the sample cafe.

Output & API

Preview the latest data, download it, or call this collector as an API.

Author's sample data
venues
Parameters
--urlstring[]DoorDash store page URLs to collect menus from; repeat this flag once per cafe. If omitted, the collector uses the tested sample cafe URL. default ["https://www.doordash.com/store/caffe-in-coffee-co-25001927/"]

Marketplace

Publish this collector so others can deploy it — you keep ownership.

0 subscribers
richbray@richbray
0 runs in 14d · published 4d ago

Versions

Every build and self-heal appends a version. Pin one to lock runs to it.

managed by author
v1builtapprovedcurrent4d ago
How this script collects data
import Firecrawl from "@mendable/firecrawl-js";
import * as cheerio from "cheerio";
import { parseArgs } from "node:util";

const DEFAULT_URLS = ["https://www.doordash.com/store/caffe-in-coffee-co-25001927/"];
const SKIP_SECTIONS = new Set([
  "Featured Items",
  "Most Ordered",
  "Popular Items",
  "Reviews",
]);

const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
  console.error("FIRECRAWL_API_KEY is not set");
  process.exit(1);
}

const { values: flags } = parseArgs({
  strict: true,
  options: {
    url: { type: "string", multiple: true },
  },
});

const urls = flags.url && flags.url.length > 0 ? flags.url : DEFAULT_URLS;
const firecrawl = new Firecrawl({ apiKey });

type JsonValue = null | boolean | number | string | JsonValue[] | { [key: string]: JsonValue };

type PostalAddress = {
  streetAddress?: string;
  addressLocality?: string;
  addressRegion?: string;
  postalCode?: string;
  addressCountry?: string;
};

type MenuItem = {
  "@type"?: string;
  name?: string;
  description?: string;
  offers?: {
    price?: string | number;
  };
};

type MenuSection = {
  name?: string;
  hasMenuItem?: MenuItem | MenuItem[];
};

type RestaurantJsonLd = {
  "@type"?: string | string[];
  name?: string;
  address?: PostalAddress;
  hasMenu?: {
    hasMenuSection?: MenuSection | MenuSection[] | MenuSection[][];
  };
};

function flatten<T>(value: T | T[] | T[][] | undefined | null): T[] {
  if (!value) return [];
  if (Array.isArray(value)) return value.flatMap((entry) => flatten(entry as T | T[] | T[][]));
  return [value];
}

function text(value: unknown): string {
  return typeof value === "string" || typeof value === "number" ? String(value).trim() : "";
}

function validateDoorDashStoreUrl(url: string): void {
  let parsed: URL;
  try {
    parsed = new URL(url);
  } catch {
    throw new Error("OUT_OF_SCOPE: invalid URL");
  }

  if (!/(^|\.)doordash\.com$/i.test(parsed.hostname) || !parsed.pathname.startsWith("/store/")) {
    throw new Error("OUT_OF_SCOPE: not a DoorDash store URL");
  }
}

function isRestaurantJsonLd(value: JsonValue): value is RestaurantJsonLd {
  if (!value || typeof value !== "object" || Array.isArray(value)) return false;
  const type = (value as RestaurantJsonLd)["@type"];
  const types = Array.isArray(type) ? type : [type];
  return types.includes("Restaurant") && Boolean((value as RestaurantJsonLd).hasMenu);
}

function findRestaurantJsonLd(rawHtml: string): RestaurantJsonLd {
  const $ = cheerio.load(rawHtml);
  let restaurant: RestaurantJsonLd | undefined;

  $('script[type="application/ld+json"]').each((_, element) => {
    if (restaurant) return;
    const scriptText = $(element).text().trim();
    if (!scriptText) return;

    try {
      const parsed = JSON.parse(scriptText) as JsonValue;
      const candidates = Array.isArray(parsed) ? parsed : [parsed];
      restaurant = candidates.find(isRestaurantJsonLd);
    } catch {
      // Ignore unrelated malformed JSON-LD blocks.
    }
  });

  if (!restaurant) {
    throw new Error("no Restaurant JSON-LD menu found on DoorDash store page");
  }

  return restaurant;
}

function formatAddress(address: PostalAddress | undefined): string {
  if (!address) return "";

  return [
    address.streetAddress,
    [address.addressLocality, address.addressRegion, address.postalCode].filter(Boolean).join(", "),
    address.addressCountry,
  ]
    .filter(Boolean)
    .join(", ");
}

async function collectVenue(url: string) {
  validateDoorDashStoreUrl(url);

  const page = await firecrawl.v1.scrapeUrl(url, {
    formats: ["rawHtml"],
    onlyMainContent: false,
    integration: "prometheus",
    timeout: 30000,
  });

  if (!page.success || !page.rawHtml) {
    throw new Error("DoorDash scrape did not return raw HTML");
  }

  const restaurant = findRestaurantJsonLd(page.rawHtml);
  const sections = flatten(restaurant.hasMenu?.hasMenuSection).filter(
    (section) => section.name && section.hasMenuItem && !SKIP_SECTIONS.has(section.name),
  );

  const items = sections.flatMap((section) =>
    flatten(section.hasMenuItem).map((item) => ({
      name: text(item.name),
      description: text(item.description),
      price: text(item.offers?.price),
      category: text(section.name),
    })),
  ).filter((item) => item.name && item.category);

  if (!restaurant.name) {
    throw new Error("Restaurant JSON-LD menu is missing venue name");
  }
  if (items.length === 0) {
    throw new Error("Restaurant JSON-LD menu contains no menu items");
  }

  return {
    name: restaurant.name,
    address: formatAddress(restaurant.address),
    items,
  };
}

async function main() {
  const venues = [];
  for (const url of urls) {
    venues.push(await collectVenue(url));
  }

  process.stdout.write(JSON.stringify({ venues }));
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
deploy to unlock

Deploy this collector to unlock schedules, the API endpoint, and destinations.

One person builds it. Everyone keeps it fresh.