Python SDK
The Proxle Python SDK provides drop-in replacements for popular LLM client libraries. Replace your import, add a proxy key, and all requests are automatically logged, cached, and tracked.
Installation
pip install proxle
Requires Python 3.8+.
Configuration
Constructor Arguments
from proxle import OpenAI
client = OpenAI(
api_key="sk-...", # Your provider API key
proxy_key="pk_live_...", # Your Proxle API key
proxy_url="https://...", # Optional: custom proxy URL
)
Environment Variables
export PROXLE_API_KEY=pk_live_...
export PROXLE_URL=https://api.proxle.dev # optional
When environment variables are set, you can omit proxy_key from the constructor.
Provider Wrappers
OpenAI
from proxle import OpenAI
client = OpenAI(api_key="sk-...", proxy_key="pk_live_...")
# Chat completions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}],
metadata={"feature": "chat", "user_id": "user_123"}
)
# Embeddings
embeddings = client.embeddings.create(
model="text-embedding-3-small",
input="Hello world"
)
Anthropic
from proxle import Anthropic
client = Anthropic(api_key="sk-ant-...", proxy_key="pk_live_...")
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello!"}],
metadata={"feature": "assistant"}
)
Azure OpenAI
from proxle import AzureOpenAI
client = AzureOpenAI(
api_key="...",
azure_endpoint="https://your-resource.openai.azure.com",
api_version="2024-02-01",
proxy_key="pk_live_..."
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
Cohere
from proxle import Cohere
client = Cohere(api_key="...", proxy_key="pk_live_...")
# Chat
response = client.chat(
model="command-r-plus",
message="Hello!",
metadata={"feature": "chat"}
)
# Embeddings
response = client.embed(
model="embed-english-v3.0",
texts=["Hello world"],
input_type="search_document"
)
Gemini
from proxle import Gemini
client = Gemini(api_key="...", proxy_key="pk_live_...")
response = client.generate_content(
model="gemini-2.0-flash",
contents=[{"role": "user", "parts": [{"text": "Hello!"}]}],
metadata={"feature": "chat"}
)
Metadata
The metadata parameter lets you tag requests for cost attribution and filtering:
response = client.chat.completions.create(
model="gpt-4o",
messages=[...],
metadata={
"feature": "chat_assistant",
"user_id": "user_123",
"environment": "production"
}
)
Metadata is limited to 4KB when serialized to JSON.
Streaming
All providers support streaming:
# OpenAI streaming
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me a story"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
# Anthropic streaming
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Tell me a story"}]
) as stream:
for text in stream.text_stream:
print(text, end="")
Error Handling
from proxle import OpenAI
from proxle.errors import ProxleError, ProxleConfigError
try:
client = OpenAI(api_key="sk-...", proxy_key="pk_live_...")
response = client.chat.completions.create(...)
except ProxleConfigError as e:
print(f"Configuration error: {e}")
except ProxleError as e:
print(f"Proxle error: {e}")
Async Support
OpenAI, Anthropic, and Azure support async clients:
from proxle import AsyncOpenAI
client = AsyncOpenAI(api_key="sk-...", proxy_key="pk_live_...")
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)