OpenAI Example (async-openai compatible)
Send both non-streaming and streaming OpenAI-style chat completions through VlloraLLMClient using async_openai_compat request types.
use async_openai_compat::types::{
ChatCompletionRequestMessage,
ChatCompletionRequestSystemMessageArgs,
ChatCompletionRequestUserMessageArgs,
CreateChatCompletionRequestArgs,
};
use tokio_stream::StreamExt;
use vllora_llm::client::VlloraLLMClient;
use vllora_llm::error::LLMResult;
#[tokio::main]
async fn main() -> LLMResult<()> {
// 1) Build an OpenAI-style request using async-openai-compatible types
let openai_req = CreateChatCompletionRequestArgs::default()
.model("gpt-4.1-mini")
.messages([
ChatCompletionRequestMessage::System(
ChatCompletionRequestSystemMessageArgs::default()
.content("You are a helpful assistant.")
.build()?,
),
ChatCompletionRequestMessage::User(
ChatCompletionRequestUserMessageArgs::default()
.content("Stream numbers 1 to 20 in separate lines.")
.build()?,
),
])
.build()?;
// 2) Construct a VlloraLLMClient
let client = VlloraLLMClient::new();
// 3) Non-streaming: send the request and print the final reply
let response = client
.completions()
.create(openai_req.clone())
.await?;
if let Some(content) = &response.message().content {
if let Some(text) = content.as_string() {
println!("Non-streaming reply:");
println!("{text}");
}
}
// 4) Streaming: send the same request and print chunks as they arrive
let mut stream = client
.completions()
.create_stream(openai_req)
.await?;
println!("Streaming response...");
while let Some(chunk) = stream.next().await {
let chunk = chunk?;
for choice in chunk.choices {
if let Some(delta) = choice.delta.content {
print!("{delta}");
}
}
}
Ok(())
}
Run the example under llm/examples/openai after exporting VLLORA_OPENAI_API_KEY.