Skip to main content

Quick start

Get up and running with the Rust SDK in minutes. This guide shows two approaches: using gateway-native types and using OpenAI-compatible types.

Gateway-native types

Here's a minimal example to get started:

use vllora_llm::client::VlloraLLMClient;
use vllora_llm::types::gateway::{ChatCompletionRequest, ChatCompletionMessage};
use vllora_llm::error::LLMResult;

#[tokio::main]
async fn main() -> LLMResult<()> {
// 1) Build a chat completion request using gateway-native types
let request = ChatCompletionRequest {
model: "gpt-4.1-mini".to_string(),
messages: vec![
ChatCompletionMessage::new_text(
"system".to_string(),
"You are a helpful assistant.".to_string(),
),
ChatCompletionMessage::new_text(
"user".to_string(),
"Stream numbers 1 to 20 in separate lines.".to_string(),
),
],
..Default::default()
};

// 2) Construct a VlloraLLMClient
let client = VlloraLLMClient::new();

// 3) Non-streaming: send the request and print the final reply
let response = client
.completions()
.create(request.clone())
.await?;

// ... handle response
Ok(())
}

Note: By default, VlloraLLMClient::new() fetches API keys from environment variables following the pattern VLLORA_{PROVIDER_NAME}_API_KEY. For example, for OpenAI, it will look for VLLORA_OPENAI_API_KEY.

Quick start with async-openai-compatible types

If you already build OpenAI-compatible requests (e.g. via async-openai-compat), you can send both non-streaming and streaming completions through VlloraLLMClient.

use async_openai::types::{
ChatCompletionRequestMessage,
ChatCompletionRequestSystemMessageArgs,
ChatCompletionRequestUserMessageArgs,
CreateChatCompletionRequestArgs,
};
use tokio_stream::StreamExt;

use vllora_llm::client::VlloraLLMClient;
use vllora_llm::error::LLMResult;
use vllora_llm::types::credentials::{ApiKeyCredentials, Credentials};

#[tokio::main]
async fn main() -> LLMResult<()> {
// 1) Build an OpenAI-style request using async-openai-compatible types
let openai_req = CreateChatCompletionRequestArgs::default()
.model("gpt-4.1-mini")
.messages([
ChatCompletionRequestMessage::System(
ChatCompletionRequestSystemMessageArgs::default()
.content("You are a helpful assistant.")
.build()?,
),
ChatCompletionRequestMessage::User(
ChatCompletionRequestUserMessageArgs::default()
.content("Stream numbers 1 to 20 in separate lines.")
.build()?,
),
])
.build()?;

// 2) Construct a VlloraLLMClient (here: direct OpenAI key)
let client = VlloraLLMClient::new().with_credentials(Credentials::ApiKey(
ApiKeyCredentials {
api_key: std::env::var("VLLORA_OPENAI_API_KEY")
.expect("VLLORA_OPENAI_API_KEY must be set"),
},
));

// 3) Non-streaming: send the request and print the final reply
let response = client
.completions()
.create(openai_req.clone())
.await?;

if let Some(content) = &response.message().content {
if let Some(text) = content.as_string() {
println!("Non-streaming reply:\n{text}");
}
}

// 4) Streaming: send the same request and print chunks as they arrive
let mut stream = client
.completions()
.create_stream(openai_req)
.await?;

while let Some(chunk) = stream.next().await {
let chunk = chunk?;
for choice in chunk.choices {
if let Some(delta) = choice.delta.content {
print!("{delta}");
}
}
}

Ok(())
}

What's next?