pipe1os · John-1-1-1 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 19, 2026
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modelinfo-cli.iml b/.idea/modelinfo-cli.iml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -153,6 +153,7 @@ Qwen2.5-0.5B       494.0M    BF16     8K         1.6 GB      ✓
 | `--topology` | `--topology nvlink` | Set interconnect topology to calculate exact communication overhead penalties (`nvlink`, `pcie4`, `pcie3`). Defaults to `pcie4`. |
 | `--strategy` | `--strategy tp` | Selects the parallelization strategy for multi-GPU setups (`tp` for Tensor Parallelism, `pp` for Pipeline Parallelism). Defaults to `tp`. |
 | `--tensors` | `--tensors` | Bypasses the algorithmic speed estimation and forces the tool to fetch all remote shards, displaying an exact size breakdown of every tensor. |
+| `--timeout` | `--timeout 30` | Network request timeout in seconds for Hugging Face Hub. Defaults to `10.0`. |
 | `-v, --version` | `modelinfo -v` | Show program's version number and exit. |
 
 ## Architecture

diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py
@@ -2,6 +2,7 @@
 import json
 import os
 import sys
+import math
 from typing import Sequence
 from modelinfo.architecture import identify_architecture_name
 from modelinfo.calculator import calculate_footprint
@@ -12,7 +13,13 @@
 
 
 class VersionAction(argparse.Action):
-    def __init__(self, option_strings, dest=argparse.SUPPRESS, default=argparse.SUPPRESS, help="show program's version number and exit"):
+    def __init__(
+        self,
+        option_strings,
+        dest=argparse.SUPPRESS,
+        default=argparse.SUPPRESS,
+        help="show program's version number and exit",
+    ):
         super().__init__(
             option_strings=option_strings,
             dest=dest,
@@ -41,12 +48,25 @@ def _positive_int(value: str) -> int:
     return ivalue
 
 
+def _positive_float(value: str) -> float:
+    try:
+        fvalue = float(value)
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"Invalid float value: {value}")
+
+    if not math.isfinite(fvalue):
+        raise argparse.ArgumentTypeError(f"Timeout must be a finite number: {value}")
+    if fvalue <= 0:
+        raise argparse.ArgumentTypeError(f"Timeout must be greater than 0: {value}")
+    return fvalue
+
+
 def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         prog="modelinfo",
         description="High-performance CLI utility to inspect ML model checkpoints and calculate VRAM requirements.",
     )
-    
+
     parser.add_argument(
         "file",
         type=str,
@@ -107,6 +127,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         default=0.9,
         help="vLLM gpu_memory_utilization ratio (default 0.9). Reserves 10 percent for PyTorch context.",
     )
+    parser.add_argument(
+        "--timeout",
+        type=_positive_float,
+        default=10.0,
+        help="Network request timeout in seconds for Hugging Face Hub (default 10.0).",
+    )
     parser.add_argument(
         "-v",
         "--version",
@@ -117,49 +143,61 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
 
 
 def analyze_model(
-    file_path: str, 
-    context_override: int | None, 
+    file_path: str,
+    context_override: int | None,
     gpu_count: int = 1,
     batch_size: int = 1,
     fetch_tensors: bool = False,
     topology: str = "pcie4",
     strategy: str = "tp",
     is_vllm: bool = False,
     gpu_vram_gb: float = 0.0,
-    gpu_util: float = 0.9
+    gpu_util: float = 0.9,
+    timeout: float = 10.0,
 ) -> dict:
     tensors = {}
     config = None
     disk_size = 0.0
-    
+
     file_path_lower = file_path.lower()
-
-    if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
+
+    if not os.path.exists(file_path) and not file_path_lower.endswith(
+        (".safetensors", ".gguf", ".pt", ".bin", ".index.json")
+    ):
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
-        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
-    elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
+
+        tensors, config, format_name, disk_size = fetch_huggingface_repo(
+            file_path, fetch_tensors=fetch_tensors, timeout=timeout
+        )
+    elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(
+        ".index.json"
+    ):
         tensors = parse_safetensors_header(file_path)
         format_name = "SafeTensors"
-        
+
         config_path = os.path.join(os.path.dirname(file_path), "config.json")
         if os.path.exists(config_path):
             try:
                 with open(config_path, "r", encoding="utf-8") as f:
                     config = json.load(f)
             except (json.JSONDecodeError, OSError):
                 pass
-                
+
     elif file_path_lower.endswith(".gguf"):
         tensors = parse_gguf_header(file_path)
         format_name = "GGUF"
     elif file_path_lower.endswith(".pt") or file_path_lower.endswith(".bin"):
         tensors = parse_pytorch_header(file_path)
         format_name = "PyTorch"
     elif os.path.isdir(file_path):
-        raise IsADirectoryError(f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory.")
+        raise IsADirectoryError(
+            f"'{file_path}' is a directory. Please provide the path to a specific weights file (e.g. .safetensors, .gguf, .pt) inside the directory."
+        )
     else:
-        raise ValueError(f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID.")
-
+        raise ValueError(
+            f"File '{file_path}' not found locally and does not appear to be a Hugging Face repository ID."
+        )
+
     max_context = None
     if config:
         max_context = config.get("max_position_embeddings")
@@ -168,15 +206,15 @@ def analyze_model(
         gen_arch = metadata.get("general.architecture")
         if gen_arch:
             max_context = metadata.get(f"{gen_arch}.context_length")
-            
+
     is_default_context = False
     context_length = context_override
     if context_length is None:
         context_length = min(8192, max_context) if max_context else 8192
         is_default_context = True
 
     footprint = calculate_footprint(
-        tensors, 
+        tensors,
         context_length=context_length,
         batch_size=batch_size,
         config=config,
@@ -185,16 +223,16 @@ def analyze_model(
         strategy=strategy,
         is_vllm=is_vllm,
         gpu_vram_bytes=gpu_vram_gb * 1024**3 if gpu_vram_gb else 0.0,
-        gpu_util=gpu_util
+        gpu_util=gpu_util,
     )
     num_layers = footprint["num_layers"]
     arch_name = identify_architecture_name(tensors, num_layers, config)
 
     if format_name != "SafeTensors" or os.path.exists(file_path):
         disk_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0.0
-        
+
     tensor_count = len([k for k in tensors.keys() if k != "__metadata__"])
-    
+
     return {
         "format_name": format_name,
         "arch_name": arch_name,
@@ -211,7 +249,7 @@ def analyze_model(
         "strategy": strategy,
         "is_vllm": is_vllm,
         "gpu_vram_gb": gpu_vram_gb,
-        "gpu_util": gpu_util
+        "gpu_util": gpu_util,
     }
 
 
@@ -221,52 +259,65 @@ def main(argv: Sequence[str] | None = None) -> int:
     gpu_name_display = None
     gpu_vram_gb = None
     gpu_count = 1
-    
+
     if args.gpu or args.vllm:
         target = args.gpu if args.gpu else "auto"
         from modelinfo.hardware import resolve_gpu
+
         gpu_name_display, gpu_vram_gb, gpu_count = resolve_gpu(target)
 
     if len(args.file) > 1:
         if args.vllm:
-            console.print("[red]Error: Side-by-side comparison does not currently support the --vllm capacity simulation. Compare models sequentially or remove --vllm.[/red]")
+            console.print(
+                "[red]Error: Side-by-side comparison does not currently support the --vllm capacity simulation. Compare models sequentially or remove --vllm.[/red]"
+            )
             return 1
-            
+
         models = []
         for model_path in args.file:
             info = analyze_model(
-                model_path, 
-                args.context, 
+                model_path,
+                args.context,
                 gpu_count=gpu_count,
                 batch_size=args.batch_size,
                 fetch_tensors=args.tensors,
                 topology=args.topology,
                 strategy=args.strategy,
                 is_vllm=args.vllm,
                 gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
-                gpu_util=args.gpu_util
+                gpu_util=args.gpu_util,
+                timeout=args.timeout,
             )
             models.append((model_path.split("/")[-1], info))
-
-        print_compare_info(models, gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
+
+        print_compare_info(
+            models,
+            gpu_vram_gb if gpu_vram_gb else args.max_vram,
+            gpu_name=gpu_name_display,
+        )
         return 0
-        
+
     file_path = args.file[0]
-    
+
     info = analyze_model(
-        file_path, 
-        args.context, 
+        file_path,
+        args.context,
         gpu_count=gpu_count,
         batch_size=args.batch_size,
         fetch_tensors=args.tensors,
         topology=args.topology,
         strategy=args.strategy,
         is_vllm=args.vllm,
         gpu_vram_gb=gpu_vram_gb if gpu_vram_gb else 0.0,
-        gpu_util=args.gpu_util
+        gpu_util=args.gpu_util,
+        timeout=args.timeout,
     )
 
-    print_model_info(**info, max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram, gpu_name=gpu_name_display)
+    print_model_info(
+        **info,
+        max_vram_gb=gpu_vram_gb if gpu_vram_gb else args.max_vram,
+        gpu_name=gpu_name_display,
+    )
     return 0