(function() { var utmInheritingDomain = "appstore.com", utmRegExp = /(&|\?)utm_[A-Za-z]+=[A-Za-z0-9]+/gi, links = document.getElementsByTagName("a"), utms = [ "utm_medium={{URL - utm_medium}}", "utm_source={{URL - utm_source}}", "utm_campaign={{URL - utm_campaign}}" ]; for (var index = 0; index < links.length; index += 1) { var tempLink = links[index].href, tempParts; if (tempLink.indexOf(utmInheritingDomain) > 0) { tempLink = tempLink.replace(utmRegExp, ""); tempParts = tempLink.split("#"); if (tempParts[0].indexOf("?") < 0 ) { tempParts[0] += "?" + utms.join("&"); } else { tempParts[0] += "&" + utms.join("&"); } tempLink = tempParts.join("#"); } links[index].href = tempLink; } }());

Supercharge generative AI
for any scale and environment

Friendli Serverless Endpoints

Fast and affordable API for open-source models

Compare plans and features

CategoryFeaturesTrialBasicEnterprise
InferenceOpenAI compatible APIs
Optimized inference APIs
Long context (128K) handling
Function calling & JSON mode
Rate limit
5k requests/min 50k tokens/min
50k requests/min 100k tokens/min
(Contact sales to increase limits)
Unlimited
ToolsDocument parsing
Web search
Code interpreter
Other built-in tools

Inference

OpenAI compatible APIs

Trial
Basic
Enterprise

Optimized inference APIs

Trial
Basic
Enterprise

Long context (128K) handling

Trial
Basic
Enterprise

Function calling & JSON mode

Trial
Basic
Enterprise

Rate limit

Trial
5k requests/min 50k tokens/min
Basic
50k requests/min 100k tokens/min
(Contact sales to increase limits)
Enterprise
Unlimited

Tools

Document parsing

Trial
Basic
Enterprise

Web search

Trial
Basic
Enterprise

Code interpreter

Trial
Basic
Enterprise

Other built-in tools

Trial
Basic
Enterprise

Pricing

Pricing details

Model name

Price per unit

Llama 3.1 8B Instruct

$0.1/1M tokens

Llama 3.1 70B Instruct

$0.6/1M tokens

Mixtral 8x7B Instruct v0.1

$0.4/1M tokens