langfuse · pyg410 · May 19, 2026 · May 19, 2026 · May 29, 2026 · May 29, 2026
diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
@@ -164,6 +164,13 @@ class Langfuse:
         host (Optional[str]): Deprecated. Use base_url instead. The Langfuse API host URL. Defaults to "https://cloud.langfuse.com".
         timeout (Optional[int]): Timeout in seconds for API requests. Defaults to 5 seconds.
         httpx_client (Optional[httpx.Client]): Custom httpx client for making non-tracing HTTP requests. If not provided, a default client will be created.
+            **Fork safety**: ``httpx.Client`` is thread-safe but not process-safe. When using
+            ``fork()``-based servers (e.g. Gunicorn with ``--preload``), the SDK automatically
+            recreates its internally-managed HTTP client in child processes after fork. A custom
+            ``httpx_client`` is intentionally left as-is (the fork-inherited copy is reused), so
+            you retain the opportunity to handle process-safety yourself — for example by
+            registering your own ``os.register_at_fork(after_in_child=...)`` handler to close and
+            reopen connections on the custom client.
         debug (bool): Enable debug logging. Defaults to False. Can also be set via LANGFUSE_DEBUG environment variable.
         tracing_enabled (Optional[bool]): Enable or disable tracing. Defaults to True. Can also be set via LANGFUSE_TRACING_ENABLED environment variable.
         flush_at (Optional[int]): Number of spans to batch before sending to the API. Defaults to 512. Can also be set via LANGFUSE_FLUSH_AT environment variable.

diff --git a/langfuse/_client/resource_manager.py b/langfuse/_client/resource_manager.py
@@ -17,6 +17,7 @@
 import atexit
 import os
 import threading
+import weakref
 from queue import Full, Queue
 from typing import Any, Callable, Dict, List, Optional, cast
 
@@ -170,6 +171,7 @@ def _initialize_instance(
         self.base_url = base_url
         self.mask = mask
         self.environment = environment
+        self._shutdown = False
 
         # Store additional client settings for get_client() to use
         self.timeout = timeout
@@ -217,6 +219,7 @@ def _initialize_instance(
         ## use connection pools with limited capacity. Creating multiple instances
         ## could exhaust the OS's maximum number of available TCP sockets (file descriptors),
         ## leading to connection errors.
+        self._custom_httpx_client = httpx_client
         if httpx_client is not None:
             self.httpx_client = httpx_client
         else:
@@ -243,7 +246,9 @@ def _initialize_instance(
             x_langfuse_public_key=self.public_key,
             timeout=timeout,
         )
-        score_ingestion_client = LangfuseClient(
+
+        # Store as instance variable so _at_fork_reinit can reuse without recreation
+        self._score_ingestion_client = LangfuseClient(
             public_key=self.public_key,
             secret_key=secret_key,
             base_url=base_url,
@@ -257,6 +262,52 @@ def _initialize_instance(
             LANGFUSE_MEDIA_UPLOAD_ENABLED, "True"
         ).lower() not in ("false", "0")
 
+        self._media_upload_thread_count = media_upload_thread_count or max(
+            int(os.getenv(LANGFUSE_MEDIA_UPLOAD_THREAD_COUNT, 1)), 1
+        )
+
+        self._init_consumer_threads()
+
+        # Prompt cache
+        self.prompt_cache = PromptCache()
+
+        # Register shutdown handler
+        atexit.register(self.shutdown)
+
+        # Register fork handler to reinitialize consumer threads in child process.
+        # When using Gunicorn with --preload, os.fork() copies memory but not threads
+        # (POSIX.1: https://pubs.opengroup.org/onlinepubs/9699919799/functions/fork.html).
+        # Without this, media upload and score ingestion threads are lost after fork,
+        # causing silent data loss.
+        #
+        # Note: LangfuseSpanProcessor (BatchSpanProcessor) already handles fork-safety
+        # for span export via its own os.register_at_fork. This handler covers the
+        # remaining background threads managed by LangfuseResourceManager.
+        #
+        # weakref.WeakMethod prevents os.register_at_fork from holding a permanent strong
+        # reference to this instance, which would block garbage collection.
+        # See: https://github.com/open-telemetry/opentelemetry-python/blob/main/opentelemetry-sdk/src/opentelemetry/sdk/_shared_internal/__init__.py
+        if hasattr(os, "register_at_fork"):
+            weak_reinit = weakref.WeakMethod(self._at_fork_reinit)
+            os.register_at_fork(
+                # Walrus operator resolves the weak reference once and stores it in
+                # a temporary variable before calling it. This avoids a TOCTOU window
+                # where GC could collect the referent between checking for None and
+                # invoking the method.
+                after_in_child=lambda: (m := weak_reinit()) and m()
+            )
+
+        langfuse_logger.info(
+            f"Startup: Langfuse tracer successfully initialized | "
+            f"public_key={self.public_key} | "
+            f"base_url={base_url} | "
+            f"environment={environment or 'default'} | "
+            f"sample_rate={sample_rate if sample_rate is not None else 1.0} | "
+            f"media_threads={self._media_upload_thread_count}"
+        )
+
+    def _init_consumer_threads(self) -> None:
+        """Initialize media upload and score ingestion consumer threads."""
         self._media_upload_queue: Queue[Any] = Queue(100_000)
         self._media_manager = MediaManager(
             api_client=self.api,
@@ -266,49 +317,150 @@ def _initialize_instance(
         )
         self._media_upload_consumers = []
 
-        media_upload_thread_count = media_upload_thread_count or max(
-            int(os.getenv(LANGFUSE_MEDIA_UPLOAD_THREAD_COUNT, 1)), 1
-        )
-
         if self._media_upload_enabled:
-            for i in range(media_upload_thread_count):
+            for i in range(self._media_upload_thread_count):
                 media_upload_consumer = MediaUploadConsumer(
                     identifier=i,
                     media_manager=self._media_manager,
                 )
                 media_upload_consumer.start()
                 self._media_upload_consumers.append(media_upload_consumer)
 
-        # Prompt cache
-        self.prompt_cache = PromptCache()
-
         # Score ingestion
         self._score_ingestion_queue: Queue[Any] = Queue(100_000)
         self._ingestion_consumers = []
 
         ingestion_consumer = ScoreIngestionConsumer(
             ingestion_queue=self._score_ingestion_queue,
             identifier=0,
-            client=score_ingestion_client,
-            flush_at=flush_at,
-            flush_interval=flush_interval,
+            client=self._score_ingestion_client,
+            flush_at=self.flush_at,
+            flush_interval=self.flush_interval,
             max_retries=3,
             public_key=self.public_key,
         )
         ingestion_consumer.start()
         self._ingestion_consumers.append(ingestion_consumer)
 
-        # Register shutdown handler
-        atexit.register(self.shutdown)
+    def _at_fork_reinit(self) -> None:
+        """Mark that post-fork reinitialization is needed; do no heavy work here.
 
-        langfuse_logger.info(
-            f"Startup: Langfuse tracer successfully initialized | "
-            f"public_key={self.public_key} | "
-            f"base_url={base_url} | "
-            f"environment={environment or 'default'} | "
-            f"sample_rate={sample_rate if sample_rate is not None else 1.0} | "
-            f"media_threads={media_upload_thread_count or 1}"
-        )
+        Called automatically via os.register_at_fork() after fork().
+        Necessary for Gunicorn --preload deployments where os.fork() is used:
+        threads are not copied to child processes (POSIX standard), so without
+        reinitialization, the child process has no consumer threads and all
+        media upload and score ingestion events are silently lost.
+
+        Note: LangfuseSpanProcessor (BatchSpanProcessor) handles span export
+        fork-safety separately via its own os.register_at_fork handler.
+
+        Skipped if shutdown() was already called on this instance, to avoid
+        restarting threads on an intentionally torn-down manager.
+
+        Heavy work (httpx.Client creation, thread spawning) is intentionally
+        deferred to _ensure_post_fork_initialized(), called on first use.
+        Doing that work here — inside the after_in_child handler — triggers
+        SSL/TLS and Objective-C runtime calls that are unsafe in the narrow
+        post-fork window and cause segfaults on macOS with gunicorn --preload.
+        """
+        if self._shutdown:
+            return
+
+        # The class-level lock may have been held by a thread in the parent at fork time.
+        # That thread does not exist in the child, so the lock can never be released and
+        # any attempt to acquire it would deadlock. Replace it with a fresh lock first.
+        LangfuseResourceManager._lock = threading.RLock()
+
+        # Replace queues with fresh empty ones so flush() (e.g. via atexit) does not
+        # block waiting for pre-fork items that no consumer will ever drain.
+        # Queue() is pure Python — safe to call here.
+        self._media_upload_queue = Queue(100_000)
+        self._score_ingestion_queue = Queue(100_000)
+        self._media_upload_consumers = []
+        self._ingestion_consumers = []
+
+        # Signal that HTTP clients and consumer threads need to be recreated on first use.
+        self._needs_post_fork_reinit = True
+        # Fresh lock to guard the one-time lazy reinit below.
+        self._post_fork_reinit_lock = threading.Lock()
+
+    def _ensure_post_fork_initialized(self) -> None:
+        """Lazily recreate HTTP clients and consumer threads after fork.
+
+        Called at the start of add_score_task() / add_trace_task() so that
+        the first actual work in the child process triggers full reinitialization.
+        The deferred approach avoids doing SSL/thread-creation work inside the
+        after_in_child handler where it causes segfaults on macOS.
+        """
+        if not getattr(self, "_needs_post_fork_reinit", False):
+            return
+
+        with self._post_fork_reinit_lock:
+            if not self._needs_post_fork_reinit:
+                return
+
+            langfuse_logger.debug(
+                f"[PID {os.getpid()}] Fork detected: reinitializing Langfuse HTTP clients and consumer threads."
+            )
+
+            # Queues are intentionally recreated here (not reused from _at_fork_reinit).
+            # Items enqueued before fork belong to the parent and must not be processed
+            # by every worker — that would duplicate uploads/scores across workers.
+            #
+            # Internally-managed httpx clients must also be recreated: fork() duplicates
+            # the parent's connection pool (TCP socket file descriptors) into the child.
+            # Both processes would then share the same underlying sockets, causing data
+            # corruption and SSL/TLS state mismatch under concurrent use.
+            #
+            # Custom httpx clients provided by the caller are NOT recreated. The
+            # fork-inherited copy is reused, giving the caller the opportunity to handle
+            # process-safety themselves (e.g. via their own os.register_at_fork handler).
+            try:
+                if self._custom_httpx_client is None:
+                    client_headers = (
+                        self.additional_headers if self.additional_headers else {}
+                    )
+                    self.httpx_client = httpx.Client(
+                        timeout=self.timeout, headers=client_headers
+                    )
+
+                self.api = LangfuseAPI(
+                    base_url=self.base_url,
+                    username=self.public_key,
+                    password=self.secret_key,
+                    x_langfuse_sdk_name="python",
+                    x_langfuse_sdk_version=langfuse_version,
+                    x_langfuse_public_key=self.public_key,
+                    httpx_client=self.httpx_client,
+                    timeout=self.timeout,
+                )
+                self._score_ingestion_client = LangfuseClient(
+                    public_key=self.public_key,
+                    secret_key=self.secret_key,
+                    base_url=self.base_url,
+                    version=langfuse_version,
+                    timeout=self.timeout or 20,
+                    session=self.httpx_client,
+                )
+            except Exception as e:
+                langfuse_logger.error(
+                    f"[PID {os.getpid()}] Failed to recreate HTTP clients after fork: {e}. "
+                    f"Network requests may fail in this worker."
+                )
+
+            try:
+                self._init_consumer_threads()
+            except Exception as e:
+                langfuse_logger.error(
+                    f"[PID {os.getpid()}] Failed to reinitialize consumer threads after fork: {e}. "
+                    f"Media upload and score ingestion will be unavailable in this worker."
+                )
+
+            self._needs_post_fork_reinit = False
+
+            langfuse_logger.debug(
+                f"[PID {os.getpid()}] Langfuse consumer threads reinitialized after fork"
+            )
 
     @classmethod
     def reset(cls) -> None:
@@ -319,6 +471,7 @@ def reset(cls) -> None:
             cls._instances.clear()
 
     def add_score_task(self, event: dict, *, force_sample: bool = False) -> None:
+        self._ensure_post_fork_initialized()
         try:
             # Sample scores with the same sampler that is used for tracing
             tracer_provider = cast(TracerProvider, otel_trace_api.get_tracer_provider())
@@ -367,6 +520,7 @@ def add_trace_task(
         self,
         event: dict,
     ) -> None:
+        self._ensure_post_fork_initialized()
         try:
             langfuse_logger.debug(
                 f"Trace: Enqueuing event type={event['type']} for trace_id={event['body'].id}"
@@ -449,6 +603,8 @@ def flush(self) -> None:
         langfuse_logger.debug("Successfully flushed media upload queue")
 
     def shutdown(self) -> None:
+        self._shutdown = True
+
         # Unregister the atexit handler first
         atexit.unregister(self.shutdown)