LatentForce-ai/Cassini-1 API & Inference Endpoint

Prerequisites

1. Install dependencies

bash
pip install vllm openai pyyaml

2. Download the prompt template

The extraction prompt is shipped as python.yaml and javascript.yaml (for both JS and TS) in this repository. Clone or download it before running inference:

bash
# Clone the full repo
git clone https://huggingface.co/LatentForce-ai/Cassini-1

# Or download the prompt file only
wget https://huggingface.co/LatentForce-ai/Cassini-1/resolve/main/python.yaml

3. Start the vLLM server

In a separate terminal, serve the model:

bash
vllm serve LatentForce-ai/Cassini-1 --max-model-len 20480

The server will be available at http://127.0.0.1:8000 by default. Allow 1–2 minutes for the model to load before sending requests.

Minimal Inference

The following snippet runs inference on a single source file and prints the extracted JSON. Point SOURCE_FILE at any .py file, and run.

python
import json
import yaml
from openai import OpenAI

# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------

VLLM_URL    = "http://127.0.0.1:8000/v1"
MODEL_ID    = "LatentForce-ai/Cassini-1"
PROMPT_YAML = "python.yaml"           # path to the prompt template from this repo
SOURCE_FILE = "main.py"  # path to the source file to analyze
MAX_TOKENS  = 6144

# ---------------------------------------------------------------------------
# Load prompt template
# ---------------------------------------------------------------------------

with open(PROMPT_YAML) as f:
    config = yaml.safe_load(f)

prompt_template = config["prompt"]

# ---------------------------------------------------------------------------
# Prepare prompt
# ---------------------------------------------------------------------------

with open(SOURCE_FILE) as f:
    source_code = f.read()

prompt = (
    prompt_template
    .replace("FILEPATH_PLACEHOLDER", SOURCE_FILE)
    .replace("CONTENT_PLACEHOLDER", source_code)
)

# ---------------------------------------------------------------------------
# Run inference
# ---------------------------------------------------------------------------

client = OpenAI(base_url=VLLM_URL, api_key="no-key")

response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[{"role": "user", "content": prompt}],
    temperature=0,
    max_tokens=MAX_TOKENS,
    extra_body={"chat_template_kwargs": {"enable_thinking": False}},
)

# ---------------------------------------------------------------------------
# Parse output
# ---------------------------------------------------------------------------

raw = response.choices[0].message.content


def parse_json_response(text: str) -> dict | None:
    """Strip markdown fences if present and parse JSON."""
    text = text.strip()
    if text.startswith("```"):
        first_newline = text.find("\n")
        if first_newline != -1:
            inner = text[first_newline + 1:]
            close = inner.rfind("```")
            if close != -1:
                inner = inner[:close]
            text = inner.strip()
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass
    # Fallback: find first complete JSON object
    start, end = text.find("{"), text.rfind("}")
    if start != -1 and end != -1 and end > start:
        try:
            return json.loads(text[start:end + 1])
        except json.JSONDecodeError:
            pass
    return None


result = parse_json_response(raw)

if result is None:
    print("Warning: model output could not be parsed as JSON.")
    print("Raw output:", raw)
else:
    print(json.dumps(result, indent=2))

Example Output

Suppose for a small Python file given below

python
"""
Declare and configure the signals for the impress core application
"""

from functools import partial

from django.core.cache import cache
from django.db import transaction
from django.db.models import signals
from django.dispatch import receiver

from core import models
from core.tasks.search import trigger_batch_document_indexer
from core.utils.users import get_users_sharing_documents_with_cache_key


@receiver(signals.post_save, sender=models.Document)
def document_post_save(sender, instance, **kwargs):  # pylint: disable=unused-argument
    """
    Asynchronous call to the document indexer at the end of the transaction.
    Note : Within the transaction we can have an empty content and a serialization
    error.
    """
    transaction.on_commit(partial(trigger_batch_document_indexer, instance))


@receiver(signals.post_save, sender=models.DocumentAccess)
def document_access_post_save(sender, instance, created, **kwargs):  # pylint: disable=unused-argument
    """
    Asynchronous call to the document indexer at the end of the transaction.
    Clear cache for the affected user.
    """
    if not created:
        transaction.on_commit(
            partial(trigger_batch_document_indexer, instance.document)
        )

    # Invalidate cache for the user
    if instance.user:
        cache_key = get_users_sharing_documents_with_cache_key(instance.user)
        cache.delete(cache_key)


@receiver(signals.post_delete, sender=models.DocumentAccess)
def document_access_post_delete(sender, instance, **kwargs):  # pylint: disable=unused-argument
    """
    Clear cache for the affected user when document access is deleted.
    """
    if instance.user:
        cache_key = get_users_sharing_documents_with_cache_key(instance.user)
        cache.delete(cache_key)

Cassini-1 produces the following structured JSON

json
{
  "imports": [
    {
      "module": "core",
      "names": [
        "models"
      ],
      "alias": {}
    },
    {
      "module": "core.tasks.search",
      "names": [
        "trigger_batch_document_indexer"
      ],
      "alias": {}
    },
    {
      "module": "core.utils.users",
      "names": [
        "get_users_sharing_documents_with_cache_key"
      ],
      "alias": {}
    }
  ],
  "references": [],
  "calls": [
    {
      "caller": "__module__",
      "callee_text": "receiver",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 14
    },
    {
      "caller": "document_post_save",
      "callee_text": "on_commit",
      "kind": "method",
      "receiver": "transaction",
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 21
    },
    {
      "caller": "document_post_save",
      "callee_text": "partial",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 21
    },
    {
      "caller": "document_post_save",
      "callee_text": "trigger_batch_document_indexer",
      "kind": "hook",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "core/tasks/search.py",
      "line": 21
    },
    {
      "caller": "__module__",
      "callee_text": "receiver",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 24
    },
    {
      "caller": "document_access_post_save",
      "callee_text": "on_commit",
      "kind": "method",
      "receiver": "transaction",
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 31
    },
    {
      "caller": "document_access_post_save",
      "callee_text": "partial",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 32
    },
    {
      "caller": "document_access_post_save",
      "callee_text": "trigger_batch_document_indexer",
      "kind": "hook",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "core/tasks/search.py",
      "line": 32
    },
    {
      "caller": "document_access_post_save",
      "callee_text": "get_users_sharing_documents_with_cache_key",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "core/utils/users.py",
      "line": 37
    },
    {
      "caller": "document_access_post_save",
      "callee_text": "delete",
      "kind": "method",
      "receiver": "cache",
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 38
    },
    {
      "caller": "__module__",
      "callee_text": "receiver",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 41
    },
    {
      "caller": "document_access_post_delete",
      "callee_text": "get_users_sharing_documents_with_cache_key",
      "kind": "free",
      "receiver": null,
      "receiver_type_hint": null,
      "callee_file_hint": "core/utils/users.py",
      "line": 47
    },
    {
      "caller": "document_access_post_delete",
      "callee_text": "delete",
      "kind": "method",
      "receiver": "cache",
      "receiver_type_hint": null,
      "callee_file_hint": "external",
      "line": 48
    }
  ],
  "type_assignments": [
    {
      "scope": "document_access_post_save",
      "var": "cache_key",
      "type": null,
      "type_module": null,
      "from_call": "get_users_sharing_documents_with_cache_key"
    },
    {
      "scope": "document_access_post_delete",
      "var": "cache_key",
      "type": null,
      "type_module": null,
      "from_call": "get_users_sharing_documents_with_cache_key"
    }
  ],
  "definitions": [
    "document_post_save",
    "document_access_post_save",
    "document_access_post_delete"
  ],
  "definitions_rich": [
    {
      "name": "document_post_save",
      "kind": "function",
      "parent": null,
      "bases": [],
      "params": [
        {
          "name": "sender",
          "type": null
        },
        {
          "name": "instance",
          "type": null
        }
      ],
      "returns": null,
      "line": 15
    },
    {
      "name": "document_access_post_save",
      "kind": "function",
      "parent": null,
      "bases": [],
      "params": [
        {
          "name": "sender",
          "type": null
        },
        {
          "name": "instance",
          "type": null
        },
        {
          "name": "created",
          "type": null
        }
      ],
      "returns": null,
      "line": 25
    },
    {
      "name": "document_access_post_delete",
      "kind": "function",
      "parent": null,
      "bases": [],
      "params": [
        {
          "name": "sender",
          "type": null
        },
        {
          "name": "instance",
          "type": null
        }
      ],
      "returns": null,
      "line": 42
    }
  ]
}

Cassini-1

Get help setting up a custom Dedicated Endpoints.

README

Prerequisites

Minimal Inference

Example Output

Explore FriendliAI today

Cassini-1