@cyanheads/pubchem-mcp-server

v0.1.20 pre-1.0

Search PubChem for chemical compounds, properties, safety, bioactivity, and cross-references.

@cyanheads/pubchem-mcp-server
claude mcp add --transport http pubchem-mcp-server https://pubchem.caseyjhand.com/mcp
codex mcp add pubchem-mcp-server --url https://pubchem.caseyjhand.com/mcp
{
  "mcpServers": {
    "pubchem-mcp-server": {
      "url": "https://pubchem.caseyjhand.com/mcp"
    }
  }
}
gemini mcp add --transport http pubchem-mcp-server https://pubchem.caseyjhand.com/mcp
{
  "mcpServers": {
    "pubchem-mcp-server": {
      "command": "bunx",
      "args": [
        "@cyanheads/pubchem-mcp-server@latest"
      ]
    }
  }
}
{
  "mcpServers": {
    "pubchem-mcp-server": {
      "type": "http",
      "url": "https://pubchem.caseyjhand.com/mcp"
    }
  }
}
curl -X POST https://pubchem.caseyjhand.com/mcp \
  -H "Content-Type: application/json" \
  -H "MCP-Protocol-Version: 2025-11-25" \
  -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-11-25","capabilities":{},"clientInfo":{"name":"curl","version":"1.0.0"}}}'

Tools

8

pubchem_search_compounds

open-world

Search PubChem for chemical compounds by identifier (name, SMILES, or InChIKey, batched up to 25), molecular formula in Hill notation, substructure or superstructure containment, or 2D Tanimoto similarity. Optionally hydrate results with properties to avoid a follow-up pubchem_get_compound_details call.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_search_compounds",
    "arguments": {
      "searchType": "<searchType>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "searchType": {
      "type": "string",
      "enum": [
        "identifier",
        "formula",
        "substructure",
        "superstructure",
        "similarity"
      ],
      "description": "Search strategy. \"identifier\": name/SMILES/InChIKey lookup. \"formula\": molecular formula. \"substructure\": find compounds containing the query as a substructure. \"superstructure\": find compounds that are themselves substructures of the query. \"similarity\": 2D Tanimoto similarity to the query."
    },
    "identifierType": {
      "description": "Required for identifier search. Type of chemical identifier: \"name\", \"smiles\", or \"inchikey\".",
      "type": "string",
      "enum": [
        "name",
        "smiles",
        "inchikey"
      ]
    },
    "identifiers": {
      "description": "Required for identifier search. Array of identifiers to resolve (1-25). Examples: [\"aspirin\", \"ibuprofen\"] for name, [\"CC(=O)OC1=CC=CC=C1C(=O)O\"] for SMILES, [\"BSYNRYMUTXBXSQ-UHFFFAOYSA-N\"] for inchikey (27-char block format).",
      "minItems": 1,
      "maxItems": 25,
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "formula": {
      "description": "Required for formula search. Molecular formula in Hill notation (e.g. \"C6H12O6\", \"CaH2O2\").",
      "type": "string"
    },
    "allowOtherElements": {
      "default": false,
      "description": "Formula search only. When true, includes compounds with additional elements beyond the formula.",
      "type": "boolean"
    },
    "query": {
      "description": "Required for substructure/superstructure/similarity searches. A SMILES string (e.g. \"CC(=O)O\") or PubChem CID as a string (e.g. \"2244\").",
      "type": "string"
    },
    "queryType": {
      "description": "Required for structure/similarity searches. Format of the query: \"smiles\" or \"cid\".",
      "type": "string",
      "enum": [
        "smiles",
        "cid"
      ]
    },
    "threshold": {
      "default": 90,
      "description": "Similarity search only. Minimum Tanimoto similarity (70-100). 90+ for close analogs, 70-80 for scaffold hops. Default: 90.",
      "type": "number",
      "minimum": 70,
      "maximum": 100
    },
    "maxResults": {
      "default": 20,
      "description": "Maximum CIDs to return (1-200). Default: 20.",
      "type": "number",
      "minimum": 1,
      "maximum": 200
    },
    "properties": {
      "description": "Optional: fetch these properties for each result, avoiding a follow-up details call. E.g. [\"MolecularFormula\", \"MolecularWeight\", \"CanonicalSMILES\"].",
      "type": "array",
      "items": {
        "type": "string",
        "enum": [
          "MolecularFormula",
          "MolecularWeight",
          "CanonicalSMILES",
          "IsomericSMILES",
          "InChI",
          "InChIKey",
          "IUPACName",
          "Title",
          "XLogP",
          "ExactMass",
          "MonoisotopicMass",
          "TPSA",
          "Complexity",
          "Charge",
          "HBondDonorCount",
          "HBondAcceptorCount",
          "RotatableBondCount",
          "HeavyAtomCount",
          "IsotopeAtomCount",
          "AtomStereoCount",
          "DefinedAtomStereoCount",
          "UndefinedAtomStereoCount",
          "BondStereoCount",
          "DefinedBondStereoCount",
          "UndefinedBondStereoCount",
          "CovalentUnitCount",
          "Volume3D"
        ]
      }
    }
  },
  "required": [
    "searchType",
    "allowOtherElements",
    "threshold",
    "maxResults"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_get_compound_details

open-world

Get detailed compound information by CID. Returns physicochemical properties (molecular weight, SMILES, InChIKey, XLogP, TPSA, etc.), optionally with a textual description (pharmacology, mechanism, therapeutic use), all known synonyms, drug-likeness assessment (Lipinski/Veber rules), and/or pharmacological classification (FDA classes, MeSH classes, ATC codes). Efficiently batches up to 100 CIDs.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_get_compound_details",
    "arguments": {
      "cids": "<cids>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "cids": {
      "minItems": 1,
      "maxItems": 100,
      "type": "array",
      "items": {
        "type": "integer",
        "exclusiveMinimum": 0,
        "maximum": 9007199254740991
      },
      "description": "PubChem Compound IDs to fetch (1-100). Batched efficiently. Resolve from names/SMILES with pubchem_search_compounds."
    },
    "properties": {
      "description": "Properties to retrieve. Defaults to a core set: MolecularFormula, MolecularWeight, IUPACName, CanonicalSMILES, IsomericSMILES, InChIKey, XLogP, TPSA, HBondDonorCount, HBondAcceptorCount, RotatableBondCount, HeavyAtomCount, Charge, Complexity.",
      "type": "array",
      "items": {
        "type": "string",
        "enum": [
          "MolecularFormula",
          "MolecularWeight",
          "CanonicalSMILES",
          "IsomericSMILES",
          "InChI",
          "InChIKey",
          "IUPACName",
          "Title",
          "XLogP",
          "ExactMass",
          "MonoisotopicMass",
          "TPSA",
          "Complexity",
          "Charge",
          "HBondDonorCount",
          "HBondAcceptorCount",
          "RotatableBondCount",
          "HeavyAtomCount",
          "IsotopeAtomCount",
          "AtomStereoCount",
          "DefinedAtomStereoCount",
          "UndefinedAtomStereoCount",
          "BondStereoCount",
          "DefinedBondStereoCount",
          "UndefinedBondStereoCount",
          "CovalentUnitCount",
          "Volume3D"
        ]
      }
    },
    "includeDescription": {
      "default": false,
      "description": "Include textual descriptions (pharmacology, mechanism, therapeutic use) attributed by source. Well-studied compounds have many overlapping summaries — capped via maxDescriptions. Fetched only for the first 10 CIDs in the batch; remaining CIDs return without descriptions.",
      "type": "boolean"
    },
    "maxDescriptions": {
      "default": 3,
      "description": "Max number of distinct description entries per compound (1-20). PubChem returns near-duplicate summaries from many depositors; we dedup and cap to keep responses focused. Default: 3.",
      "type": "integer",
      "minimum": 1,
      "maximum": 20
    },
    "includeSynonyms": {
      "default": false,
      "description": "Fetch all known names and synonyms (trade names, systematic names, registry numbers). One API call per CID — slower than the property batch for large CID lists.",
      "type": "boolean"
    },
    "includeDrugLikeness": {
      "default": false,
      "description": "Compute drug-likeness assessment: Lipinski Rule of Five (MW, XLogP, HBD, HBA) and Veber rules (TPSA, rotatable bonds). No extra API calls — computed from properties.",
      "type": "boolean"
    },
    "includeClassification": {
      "default": false,
      "description": "Include pharmacological classification: FDA Established Pharmacologic Classes, mechanisms of action, MeSH classes, and ATC codes. Fetched only for the first 10 CIDs in the batch; remaining CIDs return without classification.",
      "type": "boolean"
    }
  },
  "required": [
    "cids",
    "includeDescription",
    "maxDescriptions",
    "includeSynonyms",
    "includeDrugLikeness",
    "includeClassification"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_get_compound_image

open-world

Fetch a 2D structure diagram (PNG image) for a compound by CID.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_get_compound_image",
    "arguments": {
      "cid": "<cid>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "cid": {
      "type": "integer",
      "exclusiveMinimum": 0,
      "maximum": 9007199254740991,
      "description": "PubChem Compound ID. Resolve from name/SMILES with pubchem_search_compounds."
    },
    "size": {
      "default": "large",
      "description": "Image size: \"small\" (100x100) or \"large\" (300x300). Default: \"large\".",
      "type": "string",
      "enum": [
        "small",
        "large"
      ]
    }
  },
  "required": [
    "cid",
    "size"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_get_compound_xrefs

open-world

Get external database cross-references for a compound: PubMed citations, patent IDs, gene/protein associations, registry numbers, and taxonomy IDs. Results are capped per type with total counts reported.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_get_compound_xrefs",
    "arguments": {
      "cid": "<cid>",
      "xrefTypes": "<xrefTypes>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "cid": {
      "type": "integer",
      "exclusiveMinimum": 0,
      "maximum": 9007199254740991,
      "description": "PubChem Compound ID. Resolve from name/SMILES with pubchem_search_compounds."
    },
    "xrefTypes": {
      "minItems": 1,
      "type": "array",
      "items": {
        "type": "string",
        "enum": [
          "RegistryID",
          "RN",
          "PubMedID",
          "PatentID",
          "GeneID",
          "ProteinGI",
          "TaxonomyID"
        ]
      },
      "description": "Cross-reference types to retrieve. String IDs: RegistryID (DSSTox/EPA registry numbers), RN (CAS numbers), PatentID. Numeric IDs: PubMedID, GeneID (NCBI Gene), ProteinGI (legacy NCBI Protein GI), TaxonomyID."
    },
    "maxPerType": {
      "default": 50,
      "description": "Max IDs to return per xref type (1-500). A compound may have thousands of PubMed references. Total count always reported. Default: 50.",
      "type": "number",
      "minimum": 1,
      "maximum": 500
    }
  },
  "required": [
    "cid",
    "xrefTypes",
    "maxPerType"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_get_compound_safety

open-world

Get GHS (Globally Harmonized System) hazard classification and safety data for a compound. Returns signal word, pictograms, hazard statements (H-codes), and precautionary statements (P-codes). Data sourced from PubChem depositors — source attribution included.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_get_compound_safety",
    "arguments": {
      "cid": "<cid>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "cid": {
      "type": "integer",
      "exclusiveMinimum": 0,
      "maximum": 9007199254740991,
      "description": "PubChem Compound ID. Resolve from name/SMILES with pubchem_search_compounds."
    }
  },
  "required": [
    "cid"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_get_bioactivity

open-world

Get a compound's bioactivity profile: which assays tested it, activity outcomes (Active/Inactive/Inconclusive), target identifiers (NCBI Gene ID, UniProt/GenBank accession), and quantitative values (IC50, EC50, Ki, etc.). Filter by outcome to focus on active results.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_get_bioactivity",
    "arguments": {
      "cid": "<cid>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "cid": {
      "type": "integer",
      "exclusiveMinimum": 0,
      "maximum": 9007199254740991,
      "description": "PubChem Compound ID. Resolve from name/SMILES with pubchem_search_compounds."
    },
    "outcomeFilter": {
      "default": "all",
      "description": "Filter by activity outcome. \"active\" shows only assays where the compound showed activity — most useful for understanding biological profile. Default: \"all\".",
      "type": "string",
      "enum": [
        "active",
        "inactive",
        "all"
      ]
    },
    "maxResults": {
      "default": 20,
      "description": "Max assay results to return (1-100). Well-studied compounds have thousands of records. Default: 20.",
      "type": "number",
      "minimum": 1,
      "maximum": 100
    }
  },
  "required": [
    "cid",
    "outcomeFilter",
    "maxResults"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_search_assays

open-world

Find PubChem bioassays associated with a biological target. Search by gene symbol (e.g. "EGFR"), protein name, NCBI Gene ID, or UniProt accession. Returns assay IDs (AIDs) which can be explored further with pubchem_get_summary.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_search_assays",
    "arguments": {
      "targetType": "<targetType>",
      "targetQuery": "<targetQuery>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "targetType": {
      "type": "string",
      "enum": [
        "genesymbol",
        "proteinname",
        "geneid",
        "proteinaccession"
      ],
      "description": "Target identifier type. \"genesymbol\" and \"proteinname\" accept text names. \"geneid\" accepts NCBI Gene IDs. \"proteinaccession\" accepts UniProt accessions."
    },
    "targetQuery": {
      "type": "string",
      "description": "Target identifier. Examples: \"EGFR\" (genesymbol), \"Epidermal growth factor receptor\" (proteinname), \"1956\" (geneid), \"P00533\" (proteinaccession)."
    },
    "maxResults": {
      "default": 50,
      "description": "Max AIDs to return (1-200). Popular targets may have thousands of assays. Default: 50.",
      "type": "number",
      "minimum": 1,
      "maximum": 200
    }
  },
  "required": [
    "targetType",
    "targetQuery",
    "maxResults"
  ],
  "additionalProperties": false
}
view source ↗

pubchem_get_summary

open-world

Get descriptive summaries for PubChem entities by ID. Supports assays (AID), genes (Gene ID), proteins (UniProt accession), and taxonomy (Tax ID). Up to 10 per call.

read
invocation
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "tools/call",
  "params": {
    "name": "pubchem_get_summary",
    "arguments": {
      "entityType": "<entityType>",
      "identifiers": "<identifiers>"
    }
  }
}
schema
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "type": "object",
  "properties": {
    "entityType": {
      "type": "string",
      "enum": [
        "assay",
        "gene",
        "protein",
        "taxonomy"
      ],
      "description": "Entity type. Determines ID format and returned fields."
    },
    "identifiers": {
      "minItems": 1,
      "maxItems": 10,
      "type": "array",
      "items": {
        "anyOf": [
          {
            "type": "string",
            "description": "String identifier (e.g. UniProt accession)."
          },
          {
            "type": "number",
            "description": "Numeric identifier (e.g. AID, Gene ID, Tax ID)."
          }
        ],
        "description": "Entity identifier — string or number depending on entityType."
      },
      "description": "Entity identifiers (1-10). Type depends on entityType:\n- assay: AID (number), e.g. [1000]\n- gene: Gene ID (number), e.g. [1956]\n- protein: UniProt accession (string), e.g. [\"P00533\"]\n- taxonomy: Tax ID (number), e.g. [9606]"
    }
  },
  "required": [
    "entityType",
    "identifiers"
  ],
  "additionalProperties": false
}
view source ↗