Skip to content
7 changes: 7 additions & 0 deletions langfuse/_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ class Langfuse:
host (Optional[str]): Deprecated. Use base_url instead. The Langfuse API host URL. Defaults to "https://cloud.langfuse.com".
timeout (Optional[int]): Timeout in seconds for API requests. Defaults to 5 seconds.
httpx_client (Optional[httpx.Client]): Custom httpx client for making non-tracing HTTP requests. If not provided, a default client will be created.
**Fork safety**: ``httpx.Client`` is thread-safe but not process-safe. When using
``fork()``-based servers (e.g. Gunicorn with ``--preload``), the SDK automatically
recreates its internally-managed HTTP client in child processes after fork. A custom
``httpx_client`` is intentionally left as-is (the fork-inherited copy is reused), so
you retain the opportunity to handle process-safety yourself — for example by
registering your own ``os.register_at_fork(after_in_child=...)`` handler to close and
reopen connections on the custom client.
debug (bool): Enable debug logging. Defaults to False. Can also be set via LANGFUSE_DEBUG environment variable.
tracing_enabled (Optional[bool]): Enable or disable tracing. Defaults to True. Can also be set via LANGFUSE_TRACING_ENABLED environment variable.
flush_at (Optional[int]): Number of spans to batch before sending to the API. Defaults to 512. Can also be set via LANGFUSE_FLUSH_AT environment variable.
Expand Down
200 changes: 178 additions & 22 deletions langfuse/_client/resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import atexit
import os
import threading
import weakref
from queue import Full, Queue
from typing import Any, Callable, Dict, List, Optional, cast

Expand Down Expand Up @@ -170,6 +171,7 @@ def _initialize_instance(
self.base_url = base_url
self.mask = mask
self.environment = environment
self._shutdown = False

# Store additional client settings for get_client() to use
self.timeout = timeout
Expand Down Expand Up @@ -217,6 +219,7 @@ def _initialize_instance(
## use connection pools with limited capacity. Creating multiple instances
## could exhaust the OS's maximum number of available TCP sockets (file descriptors),
## leading to connection errors.
self._custom_httpx_client = httpx_client
if httpx_client is not None:
self.httpx_client = httpx_client
else:
Expand All @@ -243,7 +246,9 @@ def _initialize_instance(
x_langfuse_public_key=self.public_key,
timeout=timeout,
)
score_ingestion_client = LangfuseClient(

# Store as instance variable so _at_fork_reinit can reuse without recreation
self._score_ingestion_client = LangfuseClient(
public_key=self.public_key,
secret_key=secret_key,
base_url=base_url,
Expand All @@ -257,6 +262,52 @@ def _initialize_instance(
LANGFUSE_MEDIA_UPLOAD_ENABLED, "True"
).lower() not in ("false", "0")

self._media_upload_thread_count = media_upload_thread_count or max(
int(os.getenv(LANGFUSE_MEDIA_UPLOAD_THREAD_COUNT, 1)), 1
)

self._init_consumer_threads()

# Prompt cache
self.prompt_cache = PromptCache()

# Register shutdown handler
atexit.register(self.shutdown)

# Register fork handler to reinitialize consumer threads in child process.
# When using Gunicorn with --preload, os.fork() copies memory but not threads
# (POSIX.1: https://pubs.opengroup.org/onlinepubs/9699919799/functions/fork.html).
# Without this, media upload and score ingestion threads are lost after fork,
# causing silent data loss.
#
# Note: LangfuseSpanProcessor (BatchSpanProcessor) already handles fork-safety
# for span export via its own os.register_at_fork. This handler covers the
# remaining background threads managed by LangfuseResourceManager.
#
# weakref.WeakMethod prevents os.register_at_fork from holding a permanent strong
# reference to this instance, which would block garbage collection.
# See: https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py
if hasattr(os, "register_at_fork"):
weak_reinit = weakref.WeakMethod(self._at_fork_reinit)
os.register_at_fork(
# Walrus operator resolves the weak reference once and stores it in
# a temporary variable before calling it. This avoids a TOCTOU window
# where GC could collect the referent between checking for None and
# invoking the method.
after_in_child=lambda: (m := weak_reinit()) and m()
)

langfuse_logger.info(
f"Startup: Langfuse tracer successfully initialized | "
f"public_key={self.public_key} | "
f"base_url={base_url} | "
f"environment={environment or 'default'} | "
f"sample_rate={sample_rate if sample_rate is not None else 1.0} | "
f"media_threads={self._media_upload_thread_count}"
)

def _init_consumer_threads(self) -> None:
"""Initialize media upload and score ingestion consumer threads."""
self._media_upload_queue: Queue[Any] = Queue(100_000)
self._media_manager = MediaManager(
api_client=self.api,
Expand All @@ -266,49 +317,150 @@ def _initialize_instance(
)
self._media_upload_consumers = []

media_upload_thread_count = media_upload_thread_count or max(
int(os.getenv(LANGFUSE_MEDIA_UPLOAD_THREAD_COUNT, 1)), 1
)

if self._media_upload_enabled:
for i in range(media_upload_thread_count):
for i in range(self._media_upload_thread_count):
media_upload_consumer = MediaUploadConsumer(
identifier=i,
media_manager=self._media_manager,
)
media_upload_consumer.start()
self._media_upload_consumers.append(media_upload_consumer)

# Prompt cache
self.prompt_cache = PromptCache()

# Score ingestion
self._score_ingestion_queue: Queue[Any] = Queue(100_000)
self._ingestion_consumers = []

ingestion_consumer = ScoreIngestionConsumer(
ingestion_queue=self._score_ingestion_queue,
identifier=0,
client=score_ingestion_client,
flush_at=flush_at,
flush_interval=flush_interval,
client=self._score_ingestion_client,
flush_at=self.flush_at,
flush_interval=self.flush_interval,
max_retries=3,
public_key=self.public_key,
)
ingestion_consumer.start()
self._ingestion_consumers.append(ingestion_consumer)

# Register shutdown handler
atexit.register(self.shutdown)
def _at_fork_reinit(self) -> None:
Comment thread
wochinge marked this conversation as resolved.
"""Mark that post-fork reinitialization is needed; do no heavy work here.

langfuse_logger.info(
f"Startup: Langfuse tracer successfully initialized | "
f"public_key={self.public_key} | "
f"base_url={base_url} | "
f"environment={environment or 'default'} | "
f"sample_rate={sample_rate if sample_rate is not None else 1.0} | "
f"media_threads={media_upload_thread_count or 1}"
)
Called automatically via os.register_at_fork() after fork().
Necessary for Gunicorn --preload deployments where os.fork() is used:
threads are not copied to child processes (POSIX standard), so without
reinitialization, the child process has no consumer threads and all
media upload and score ingestion events are silently lost.

Note: LangfuseSpanProcessor (BatchSpanProcessor) handles span export
fork-safety separately via its own os.register_at_fork handler.

Skipped if shutdown() was already called on this instance, to avoid
restarting threads on an intentionally torn-down manager.

Heavy work (httpx.Client creation, thread spawning) is intentionally
deferred to _ensure_post_fork_initialized(), called on first use.
Doing that work here — inside the after_in_child handler — triggers
SSL/TLS and Objective-C runtime calls that are unsafe in the narrow
post-fork window and cause segfaults on macOS with gunicorn --preload.
"""
if self._shutdown:
return

# The class-level lock may have been held by a thread in the parent at fork time.
# That thread does not exist in the child, so the lock can never be released and
# any attempt to acquire it would deadlock. Replace it with a fresh lock first.
LangfuseResourceManager._lock = threading.RLock()

# Replace queues with fresh empty ones so flush() (e.g. via atexit) does not
# block waiting for pre-fork items that no consumer will ever drain.
# Queue() is pure Python — safe to call here.
self._media_upload_queue = Queue(100_000)
self._score_ingestion_queue = Queue(100_000)
self._media_upload_consumers = []
self._ingestion_consumers = []

# Signal that HTTP clients and consumer threads need to be recreated on first use.
self._needs_post_fork_reinit = True
# Fresh lock to guard the one-time lazy reinit below.
self._post_fork_reinit_lock = threading.Lock()

def _ensure_post_fork_initialized(self) -> None:
"""Lazily recreate HTTP clients and consumer threads after fork.

Called at the start of add_score_task() / add_trace_task() so that
the first actual work in the child process triggers full reinitialization.
The deferred approach avoids doing SSL/thread-creation work inside the
after_in_child handler where it causes segfaults on macOS.
"""
if not getattr(self, "_needs_post_fork_reinit", False):
return

with self._post_fork_reinit_lock:
if not self._needs_post_fork_reinit:
return

langfuse_logger.debug(
f"[PID {os.getpid()}] Fork detected: reinitializing Langfuse HTTP clients and consumer threads."
)

# Queues are intentionally recreated here (not reused from _at_fork_reinit).
# Items enqueued before fork belong to the parent and must not be processed
# by every worker — that would duplicate uploads/scores across workers.
#
# Internally-managed httpx clients must also be recreated: fork() duplicates
# the parent's connection pool (TCP socket file descriptors) into the child.
# Both processes would then share the same underlying sockets, causing data
# corruption and SSL/TLS state mismatch under concurrent use.
#
# Custom httpx clients provided by the caller are NOT recreated. The
# fork-inherited copy is reused, giving the caller the opportunity to handle
# process-safety themselves (e.g. via their own os.register_at_fork handler).
try:
if self._custom_httpx_client is None:
client_headers = (
self.additional_headers if self.additional_headers else {}
)
self.httpx_client = httpx.Client(
timeout=self.timeout, headers=client_headers
)

self.api = LangfuseAPI(
base_url=self.base_url,
username=self.public_key,
password=self.secret_key,
x_langfuse_sdk_name="python",
x_langfuse_sdk_version=langfuse_version,
x_langfuse_public_key=self.public_key,
httpx_client=self.httpx_client,
timeout=self.timeout,
)
self._score_ingestion_client = LangfuseClient(
public_key=self.public_key,
secret_key=self.secret_key,
base_url=self.base_url,
version=langfuse_version,
timeout=self.timeout or 20,
session=self.httpx_client,
)
except Exception as e:
langfuse_logger.error(
f"[PID {os.getpid()}] Failed to recreate HTTP clients after fork: {e}. "
f"Network requests may fail in this worker."
)

try:
self._init_consumer_threads()
except Exception as e:
langfuse_logger.error(
f"[PID {os.getpid()}] Failed to reinitialize consumer threads after fork: {e}. "
f"Media upload and score ingestion will be unavailable in this worker."
)

self._needs_post_fork_reinit = False

langfuse_logger.debug(
f"[PID {os.getpid()}] Langfuse consumer threads reinitialized after fork"
)

@classmethod
def reset(cls) -> None:
Expand All @@ -319,6 +471,7 @@ def reset(cls) -> None:
cls._instances.clear()

def add_score_task(self, event: dict, *, force_sample: bool = False) -> None:
self._ensure_post_fork_initialized()
try:
# Sample scores with the same sampler that is used for tracing
tracer_provider = cast(TracerProvider, otel_trace_api.get_tracer_provider())
Expand Down Expand Up @@ -367,6 +520,7 @@ def add_trace_task(
self,
event: dict,
) -> None:
self._ensure_post_fork_initialized()
try:
langfuse_logger.debug(
f"Trace: Enqueuing event type={event['type']} for trace_id={event['body'].id}"
Expand Down Expand Up @@ -449,6 +603,8 @@ def flush(self) -> None:
langfuse_logger.debug("Successfully flushed media upload queue")

def shutdown(self) -> None:
self._shutdown = True

# Unregister the atexit handler first
atexit.unregister(self.shutdown)

Expand Down
Loading