(function() { var utmInheritingDomain = "appstore.com", utmRegExp = /(&|\?)utm_[A-Za-z]+=[A-Za-z0-9]+/gi, links = document.getElementsByTagName("a"), utms = [ "utm_medium={{URL - utm_medium}}", "utm_source={{URL - utm_source}}", "utm_campaign={{URL - utm_campaign}}" ]; for (var index = 0; index < links.length; index += 1) { var tempLink = links[index].href, tempParts; if (tempLink.indexOf(utmInheritingDomain) > 0) { tempLink = tempLink.replace(utmRegExp, ""); tempParts = tempLink.split("#"); if (tempParts[0].indexOf("?") < 0 ) { tempParts[0] += "?" + utms.join("&"); } else { tempParts[0] += "&" + utms.join("&"); } tempLink = tempParts.join("#"); } links[index].href = tempLink; } }());
  • June 12, 2024
  • 5 min read

Deploying Your Inference Endpoints on AWS Sagemaker with Friendli Container

Deploying Your Inference Endpoints on AWS Sagemaker with Friendli Container thumbnail

This blog post will guide you through creating an Amazon SageMaker Model from model artifacts stored in an S3 bucket and leveraging Friendli ECR container images. We'll then configure and deploy this model as a SageMaker Endpoint for real-time inference. You could then use this Endpoint to invoke your models and receive generative AI inference responses.

By utilizing Friendli Containers in your SageMaker pipeline, you'll benefit from the Friendli Engine's speed and resource efficiency. We'll explore how to create inference endpoints using both the AWS Console and the boto3 Python SDK.

The General Workflow