Skip to main content
PATCH
/
endpoints
/
{endpointId}
Update an endpoint
curl --request PATCH \
  --url https://rest.runpod.io/v1/endpoints/{endpointId} \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "allowedCudaVersions": [
    "12.8"
  ],
  "cpuFlavorIds": [
    "cpu3c"
  ],
  "dataCenterIds": [
    "EU-RO-1",
    "CA-MTL-1"
  ],
  "executionTimeoutMs": 600000,
  "flashboot": true,
  "gpuCount": 1,
  "gpuTypeIds": [
    "NVIDIA GeForce RTX 4090"
  ],
  "idleTimeout": 5,
  "name": "<string>",
  "networkVolumeId": "<string>",
  "scalerType": "QUEUE_DELAY",
  "scalerValue": 4,
  "templateId": "30zmvf89kd",
  "vcpuCount": 2,
  "workersMax": 3,
  "workersMin": 0
}
'
{
  "allowedCudaVersions": [
    "12.8"
  ],
  "computeType": "GPU",
  "createdAt": "2024-07-12T19:14:40.144Z",
  "dataCenterIds": "EU-NL-1,EU-RO-1,EU-SE-1",
  "env": {
    "ENV_VAR": "value"
  },
  "executionTimeoutMs": 600000,
  "gpuCount": 1,
  "gpuTypeIds": [
    "NVIDIA GeForce RTX 4090"
  ],
  "id": "jpnw0v75y3qoql",
  "idleTimeout": 5,
  "instanceIds": [
    "cpu3c-8-16"
  ],
  "name": "my endpoint",
  "networkVolumeId": "agv6w2qcg7",
  "scalerType": "QUEUE_DELAY",
  "scalerValue": 4,
  "template": {
    "category": "NVIDIA",
    "containerDiskInGb": 50,
    "containerRegistryAuthId": "<string>",
    "dockerEntrypoint": [],
    "dockerStartCmd": [],
    "earned": 100,
    "env": {
      "ENV_VAR": "value"
    },
    "id": "30zmvf89kd",
    "imageName": "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04",
    "isPublic": false,
    "isRunpod": true,
    "isServerless": true,
    "name": "my template",
    "ports": [
      "8888/http",
      "22/tcp"
    ],
    "readme": "<string>",
    "runtimeInMin": 123,
    "volumeInGb": 20,
    "volumeMountPath": "/workspace"
  },
  "templateId": "30zmvf89kd",
  "userId": "user_2PyTJrLzeuwfZilRZ7JhCQDuSqo",
  "version": 0,
  "workers": [
    {
      "adjustedCostPerHr": 0.69,
      "aiApiId": null,
      "consumerUserId": "user_2PyTJrLzeuwfZilRZ7JhCQDuSqo",
      "containerDiskInGb": 50,
      "containerRegistryAuthId": "clzdaifot0001l90809257ynb",
      "costPerHr": "0.74",
      "cpuFlavorId": "cpu3c",
      "desiredStatus": "RUNNING",
      "dockerEntrypoint": [
        "<string>"
      ],
      "dockerStartCmd": [
        "<string>"
      ],
      "endpointId": null,
      "env": {
        "ENV_VAR": "value"
      },
      "gpu": {
        "id": "<string>",
        "count": 1,
        "displayName": "<string>",
        "securePrice": 123,
        "communityPrice": 123,
        "oneMonthPrice": 123,
        "threeMonthPrice": 123,
        "sixMonthPrice": 123,
        "oneWeekPrice": 123,
        "communitySpotPrice": 123,
        "secureSpotPrice": 123
      },
      "id": "xedezhzb9la3ye",
      "image": "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04",
      "interruptible": false,
      "lastStartedAt": "2024-07-12T19:14:40.144Z",
      "lastStatusChange": "Rented by User: Fri Jul 12 2024 15:14:40 GMT-0400 (Eastern Daylight Time)",
      "locked": false,
      "machine": {
        "minPodGpuCount": 123,
        "gpuTypeId": "<string>",
        "gpuType": {
          "id": "<string>",
          "count": 1,
          "displayName": "<string>",
          "securePrice": 123,
          "communityPrice": 123,
          "oneMonthPrice": 123,
          "threeMonthPrice": 123,
          "sixMonthPrice": 123,
          "oneWeekPrice": 123,
          "communitySpotPrice": 123,
          "secureSpotPrice": 123
        },
        "cpuCount": 123,
        "cpuTypeId": "<string>",
        "cpuType": {
          "id": "<string>",
          "displayName": "<string>",
          "cores": 123,
          "threadsPerCore": 123,
          "groupId": "<string>"
        },
        "location": "<string>",
        "dataCenterId": "<string>",
        "diskThroughputMBps": 123,
        "maxDownloadSpeedMbps": 123,
        "maxUploadSpeedMbps": 123,
        "supportPublicIp": true,
        "secureCloud": true,
        "maintenanceStart": "<string>",
        "maintenanceEnd": "<string>",
        "maintenanceNote": "<string>",
        "note": "<string>",
        "costPerHr": 123,
        "currentPricePerGpu": 123,
        "gpuAvailable": 123,
        "gpuDisplayName": "<string>"
      },
      "machineId": "s194cr8pls2z",
      "memoryInGb": 62,
      "name": "<string>",
      "networkVolume": {
        "id": "agv6w2qcg7",
        "name": "my network volume",
        "size": 50,
        "dataCenterId": "EU-RO-1"
      },
      "portMappings": {
        "22": 10341
      },
      "ports": [
        "8888/http",
        "22/tcp"
      ],
      "publicIp": "100.65.0.119",
      "savingsPlans": [
        {
          "costPerHr": 0.21,
          "endTime": "2024-07-12T19:14:40.144Z",
          "gpuTypeId": "NVIDIA GeForce RTX 4090",
          "id": "clkrb4qci0000mb09c7sualzo",
          "podId": "xedezhzb9la3ye",
          "startTime": "2024-05-12T19:14:40.144Z"
        }
      ],
      "slsVersion": 0,
      "templateId": null,
      "vcpuCount": 24,
      "volumeEncrypted": false,
      "volumeInGb": 20,
      "volumeMountPath": "/workspace"
    }
  ],
  "workersMax": 3,
  "workersMin": 0
}

Authorizations

Authorization
string
header
required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

endpointId
string
required

ID of endpoint that needs to be updated.

Body

application/json

Update an endpoint.

Input for updating an endpoint which will trigger a rolling release on the endpoint.

allowedCudaVersions
enum<string>[]

If the created Serverless endpoint is a GPU endpoint, a list of acceptable CUDA versions on the created workers. If not set, any CUDA version is acceptable.

Available options:
12.8,
12.7,
12.6,
12.5,
12.4,
12.3,
12.2,
12.1,
12.0,
11.8
cpuFlavorIds
enum<string>[]

If the created Serverless endpoint is a CPU endpoint, a list of Runpod CPU flavors which can be attached to the created workers. The order of the list determines the order to rent CPU flavors.

Available options:
cpu3c,
cpu3g,
cpu5c,
cpu5g
dataCenterIds
enum<string>[]

A list of Runpod data center IDs where workers on the created Serverless endpoint can be located.

Available options:
EU-RO-1,
CA-MTL-1,
EU-SE-1,
US-IL-1,
EUR-IS-1,
EU-CZ-1,
US-TX-3,
EUR-IS-2,
US-KS-2,
US-GA-2,
US-WA-1,
US-TX-1,
CA-MTL-3,
EU-NL-1,
US-TX-4,
US-CA-2,
US-NC-1,
OC-AU-1,
US-DE-1,
EUR-IS-3,
CA-MTL-2,
AP-JP-1,
EUR-NO-1,
EU-FR-1,
US-KS-3,
US-GA-1
Example:
["EU-RO-1", "CA-MTL-1"]
executionTimeoutMs
integer

The maximum number of milliseconds an individual request can run on a Serverless endpoint before the worker is stopped and the request is marked as failed.

Example:

600000

flashboot
boolean

Whether to use flash boot for the created Serverless endpoint.

Example:

true

gpuCount
integer
default:1

If the created Serverless endpoint is a GPU endpoint, the number of GPUs attached to each worker on the endpoint.

Required range: x >= 1
gpuTypeIds
enum<string>[]

If the created Serverless endpoint is a GPU endpoint, a list of Runpod GPU types which can be attached to the created workers. The order of the list determines the order to rent GPU types.

Available options:
NVIDIA GeForce RTX 4090,
NVIDIA A40,
NVIDIA RTX A5000,
NVIDIA GeForce RTX 3090,
NVIDIA RTX A4500,
NVIDIA RTX A6000,
NVIDIA L40S,
NVIDIA L4,
NVIDIA H100 80GB HBM3,
NVIDIA RTX 4000 Ada Generation,
NVIDIA A100 80GB PCIe,
NVIDIA A100-SXM4-80GB,
NVIDIA RTX A4000,
NVIDIA RTX 6000 Ada Generation,
NVIDIA RTX 2000 Ada Generation,
NVIDIA H200,
NVIDIA L40,
NVIDIA H100 NVL,
NVIDIA H100 PCIe,
NVIDIA GeForce RTX 3080 Ti,
NVIDIA GeForce RTX 3080,
NVIDIA GeForce RTX 3070,
Tesla V100-PCIE-16GB,
AMD Instinct MI300X OAM,
NVIDIA RTX A2000,
Tesla V100-FHHL-16GB,
NVIDIA GeForce RTX 4080 SUPER,
Tesla V100-SXM2-16GB,
NVIDIA GeForce RTX 4070 Ti,
Tesla V100-SXM2-32GB,
NVIDIA RTX 4000 SFF Ada Generation,
NVIDIA RTX 5000 Ada Generation,
NVIDIA GeForce RTX 5090,
NVIDIA A30,
NVIDIA GeForce RTX 4080,
NVIDIA GeForce RTX 5080,
NVIDIA GeForce RTX 3090 Ti,
NVIDIA B200
idleTimeout
integer
default:5

The number of seconds a worker on the created Serverless endpoint can run without taking a job before the worker is scaled down.

Required range: 1 <= x <= 3600
name
string

A user-defined name for the created Serverless endpoint. The name does not need to be unique.

Maximum string length: 191
networkVolumeId
string

The unique string identifying the network volume to attach to the created Serverless endpoint.

scalerType
enum<string>
default:QUEUE_DELAY

The method used to scale up workers on the created Serverless endpoint. If QUEUE_DELAY, workers are scaled based on a periodic check to see if any requests have been in queue for too long. If REQUEST_COUNT, the desired number of workers is periodically calculated based on the number of requests in the endpoint's queue. Use QUEUE_DELAY if you need to ensure requests take no longer than a maximum latency, and use REQUEST_COUNT if you need to scale based on the number of requests.

Available options:
QUEUE_DELAY,
REQUEST_COUNT
scalerValue
integer
default:4

If the endpoint scalerType is QUEUE_DELAY, the number of seconds a request can remain in queue before a new worker is scaled up. If the endpoint scalerType is REQUEST_COUNT, the number of workers is increased as needed to meet the number of requests in the endpoint's queue divided by scalerValue.

Required range: x >= 1
templateId
string

The unique string identifying the template used to create the Serverless endpoint.

Example:

"30zmvf89kd"

vcpuCount
integer
default:2

If the created Serverless endpoint is a CPU endpoint, the number of vCPUs allocated to each created worker.

workersMax
integer

The maximum number of workers that can be running at the same time on a Serverless endpoint.

Required range: x >= 0
Example:

3

workersMin
integer

The minimum number of workers that will run at the same time on a Serverless endpoint. This number of workers will always stay running for the endpoint, and will be charged even if no requests are being processed, but they are charged at a lower rate than running autoscaling workers.

Required range: x >= 0
Example:

0

Response

Successful operation.

allowedCudaVersions
enum<string>[]

A list of acceptable CUDA versions for the workers on a Serverless endpoint. If not set, any CUDA version is acceptable.

Available options:
12.8,
12.7,
12.6,
12.5,
12.4,
12.3,
12.2,
12.1,
12.0,
11.8
computeType
enum<string>

The type of compute used by workers on a Serverless endpoint.

Available options:
CPU,
GPU
Example:

"GPU"

createdAt
string

The UTC timestamp when a Serverless endpoint was created.

Example:

"2024-07-12T19:14:40.144Z"

dataCenterIds
enum<string>[]

A list of Runpod data center IDs where workers on a Serverless endpoint can be located.

Available options:
EU-RO-1,
CA-MTL-1,
EU-SE-1,
US-IL-1,
EUR-IS-1,
EU-CZ-1,
US-TX-3,
EUR-IS-2,
US-KS-2,
US-GA-2,
US-WA-1,
US-TX-1,
CA-MTL-3,
EU-NL-1,
US-TX-4,
US-CA-2,
US-NC-1,
OC-AU-1,
US-DE-1,
EUR-IS-3,
CA-MTL-2,
AP-JP-1,
EUR-NO-1,
EU-FR-1,
US-KS-3,
US-GA-1
Example:

"EU-NL-1,EU-RO-1,EU-SE-1"

env
object
Example:
{ "ENV_VAR": "value" }
executionTimeoutMs
integer

The maximum number of milliseconds an individual request can run on a Serverless endpoint before the worker is stopped and the request is marked as failed.

Example:

600000

gpuCount
integer

The number of GPUs attached to each worker on a Serverless endpoint.

Example:

1

gpuTypeIds
enum<string>[]

A list of Runpod GPU types which can be attached to a Serverless endpoint.

Available options:
NVIDIA GeForce RTX 4090,
NVIDIA A40,
NVIDIA RTX A5000,
NVIDIA GeForce RTX 3090,
NVIDIA RTX A4500,
NVIDIA RTX A6000,
NVIDIA L40S,
NVIDIA L4,
NVIDIA H100 80GB HBM3,
NVIDIA RTX 4000 Ada Generation,
NVIDIA A100 80GB PCIe,
NVIDIA A100-SXM4-80GB,
NVIDIA RTX A4000,
NVIDIA RTX 6000 Ada Generation,
NVIDIA RTX 2000 Ada Generation,
NVIDIA H200,
NVIDIA L40,
NVIDIA H100 NVL,
NVIDIA H100 PCIe,
NVIDIA GeForce RTX 3080 Ti,
NVIDIA GeForce RTX 3080,
NVIDIA GeForce RTX 3070,
Tesla V100-PCIE-16GB,
AMD Instinct MI300X OAM,
NVIDIA RTX A2000,
Tesla V100-FHHL-16GB,
NVIDIA GeForce RTX 4080 SUPER,
Tesla V100-SXM2-16GB,
NVIDIA GeForce RTX 4070 Ti,
Tesla V100-SXM2-32GB,
NVIDIA RTX 4000 SFF Ada Generation,
NVIDIA RTX 5000 Ada Generation,
NVIDIA GeForce RTX 5090,
NVIDIA A30,
NVIDIA GeForce RTX 4080,
NVIDIA GeForce RTX 5080,
NVIDIA GeForce RTX 3090 Ti,
NVIDIA B200
id
string

A unique string identifying a Serverless endpoint.

Example:

"jpnw0v75y3qoql"

idleTimeout
integer

The number of seconds a worker on a Serverless endpoint can be running without taking a job before the worker is scaled down.

Example:

5

instanceIds
string[]

For CPU Serverless endpoints, a list of instance IDs that can be attached to a Serverless endpoint.

Example:
["cpu3c-8-16"]
name
string

A user-defined name for a Serverless endpoint. The name does not need to be unique.

Example:

"my endpoint"

networkVolumeId
string

The unique string identifying the network volume to attach to the Serverless endpoint.

Example:

"agv6w2qcg7"

scalerType
enum<string>

The method used to scale up workers on a Serverless endpoint. If QUEUE_DELAY, workers are scaled based on a periodic check to see if any requests have been in queue for too long. If REQUEST_COUNT, the desired number of workers is periodically calculated based on the number of requests in the endpoint's queue. Use QUEUE_DELAY if you need to ensure requests take no longer than a maximum latency, and use REQUEST_COUNT if you need to scale based on the number of requests.

Available options:
QUEUE_DELAY,
REQUEST_COUNT
Example:

"QUEUE_DELAY"

scalerValue
integer

If the endpoint scalerType is QUEUE_DELAY, the number of seconds a request can remain in queue before a new worker is scaled up. If the endpoint scalerType is REQUEST_COUNT, the number of workers is increased as needed to meet the number of requests in the endpoint's queue divided by scalerValue.

Example:

4

template
object
templateId
string

The unique string identifying the template used to create a Serverless endpoint.

Example:

"30zmvf89kd"

userId
string

A unique string identifying the Runpod user who created a Serverless endpoint.

Example:

"user_2PyTJrLzeuwfZilRZ7JhCQDuSqo"

version
integer

The latest version of a Serverless endpoint, which is updated whenever the template or environment variables of the endpoint are changed.

Example:

0

workers
object[]

Information about current workers on a Serverless endpoint.

workersMax
integer

The maximum number of workers that can be running at the same time on a Serverless endpoint.

Example:

3

workersMin
integer

The minimum number of workers that will run at the same time on a Serverless endpoint. This number of workers will always stay running for the endpoint, and will be charged even if no requests are being processed, but they are charged at a lower rate than running autoscaling workers.

Example:

0