From b734098496e20c0b1d87803a36871c0bed103cd2 Mon Sep 17 00:00:00 2001
From: Rupayon Haldar <80724680+rupayon123@users.noreply.github.com>
Date: Sat, 20 Jun 2026 06:11:58 -0400
Subject: [PATCH 1/3] Add Hugging Face timeout CLI option

---
 src/modelinfo/cli.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/modelinfo/cli.py b/src/modelinfo/cli.py
index 1d9da7a..09a8d8f 100644
--- a/src/modelinfo/cli.py
+++ b/src/modelinfo/cli.py
@@ -41,6 +41,13 @@ def _positive_int(value: str) -> int:
     return ivalue
 
 
+def _positive_float(value: str) -> float:
+    fvalue = float(value)
+    if fvalue <= 0:
+        raise argparse.ArgumentTypeError("timeout must be greater than 0")
+    return fvalue
+
+
 def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         prog="modelinfo",
@@ -82,6 +89,12 @@ def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
         action="store_true",
         help="Deep dive: Fetch all remote tensor shards to display the exact tensor size breakdown.",
     )
+    parser.add_argument(
+        "--timeout",
+        type=_positive_float,
+        default=10.0,
+        help="Network timeout in seconds for remote Hugging Face fetches.",
+    )
     parser.add_argument(
         "--topology",
         type=str,
@@ -122,6 +135,7 @@ def analyze_model(
     gpu_count: int = 1,
     batch_size: int = 1,
     fetch_tensors: bool = False,
+    timeout: float = 10.0,
     topology: str = "pcie4",
     strategy: str = "tp",
     is_vllm: bool = False,
@@ -136,7 +150,9 @@ def analyze_model(
     
     if not os.path.exists(file_path) and not file_path_lower.endswith((".safetensors", ".gguf", ".pt", ".bin", ".index.json")):
         from modelinfo.parsers.huggingface import fetch_huggingface_repo
-        tensors, config, format_name, disk_size = fetch_huggingface_repo(file_path, fetch_tensors=fetch_tensors)
+        tensors, config, format_name, disk_size = fetch_huggingface_repo(
+            file_path, fetch_tensors=fetch_tensors, timeout=timeout
+        )
     elif file_path_lower.endswith(".safetensors") or file_path_lower.endswith(".index.json"):
         tensors = parse_safetensors_header(file_path)
         format_name = "SafeTensors"
@@ -240,6 +256,7 @@ def main(argv: Sequence[str] | None = None) -> int:
                 gpu_count=gpu_count,
                 batch_size=args.batch_size,
                 fetch_tensors=args.tensors,
+                timeout=args.timeout,
                 topology=args.topology,
                 strategy=args.strategy,
                 is_vllm=args.vllm,
@@ -259,6 +276,7 @@ def main(argv: Sequence[str] | None = None) -> int:
         gpu_count=gpu_count,
         batch_size=args.batch_size,
         fetch_tensors=args.tensors,
+        timeout=args.timeout,
         topology=args.topology,
         strategy=args.strategy,
         is_vllm=args.vllm,

From eeacf57d3badc8c0054dcf33a95f6eec0751516d Mon Sep 17 00:00:00 2001
From: Rupayon Haldar <80724680+rupayon123@users.noreply.github.com>
Date: Sat, 20 Jun 2026 06:12:39 -0400
Subject: [PATCH 2/3] Thread timeout through Hugging Face fetches

---
 src/modelinfo/parsers/huggingface.py | 31 ++++++++++++++++------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/src/modelinfo/parsers/huggingface.py b/src/modelinfo/parsers/huggingface.py
index fe793c8..a070e41 100644
--- a/src/modelinfo/parsers/huggingface.py
+++ b/src/modelinfo/parsers/huggingface.py
@@ -47,7 +47,12 @@ def _get_hf_token() -> str | None:
             
     return None
 
-def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None = None) -> bytes:
+def _make_request(
+    url: str,
+    headers: Dict[str, str] = None,
+    limit: int | None = None,
+    timeout: float = 10.0,
+) -> bytes:
     if headers is None:
         headers = {}
         
@@ -57,7 +62,7 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
         
     req = urllib.request.Request(url, headers=headers)
     try:
-        with urllib.request.urlopen(req, timeout=10) as response:
+        with urllib.request.urlopen(req, timeout=timeout) as response:
             if limit is not None:
                 return response.read(limit)
             return response.read()
@@ -68,16 +73,16 @@ def _make_request(url: str, headers: Dict[str, str] = None, limit: int | None =
            raise FileNotFoundError(f"Could not find repository or file on Hugging Face (404 Not Found): {url}")
         raise
 
-def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
+def _fetch_safetensors_header(repo_id: str, filename: str, timeout: float = 10.0) -> Dict[str, Any]:
     url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/{filename}"
     
     # 1. Fetch the first 500KB in a single roundtrip
     headers = {"Range": "bytes=0-500000"}
     try:
-        chunk = _make_request(url, headers=headers, limit=500000)
+        chunk = _make_request(url, headers=headers, limit=500000, timeout=timeout)
     except urllib.error.HTTPError as e:
         if e.code == 416: # Range Not Satisfiable (file is smaller than 500KB)
-            chunk = _make_request(url, limit=500000)
+            chunk = _make_request(url, limit=500000, timeout=timeout)
         else:
             raise
             
@@ -92,18 +97,18 @@ def _fetch_safetensors_header(repo_id: str, filename: str) -> Dict[str, Any]:
     else:
         # 3. Double-roundtrip only if the header is massive (>500KB)
         headers = {"Range": f"bytes=8-{8+header_size-1}"}
-        json_bytes = _make_request(url, headers=headers, limit=header_size)
+        json_bytes = _make_request(url, headers=headers, limit=header_size, timeout=timeout)
         
     return json.loads(json_bytes)
 
-def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
+def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False, timeout: float = 10.0) -> Tuple[Dict[str, Any], Dict[str, Any] | None, str, float]:
     """
     Fetches the metadata directly from the Hugging Face Hub over the network.
     Returns: (tensors, config, format_name, disk_size)
     """
     api_url = f"{_get_hf_endpoint()}/api/models/{repo_id}"
     try:
-        api_data = json.loads(_make_request(api_url).decode("utf-8"))
+        api_data = json.loads(_make_request(api_url, timeout=timeout).decode("utf-8"))
     except urllib.error.HTTPError as e:
         if e.code == 401:
             raise PermissionError(f"Gated/Private Model (401 Unauthorized). Set the HF_TOKEN environment variable to access {repo_id}")
@@ -117,7 +122,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
     config = None
     if "config.json" in filenames:
         config_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/config.json"
-        config = json.loads(_make_request(config_url).decode("utf-8"))
+        config = json.loads(_make_request(config_url, timeout=timeout).decode("utf-8"))
         
     tensors = {}
     total_size = 0.0
@@ -125,7 +130,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
     if "model.safetensors.index.json" in filenames:
         # Sharded SafeTensors
         index_url = f"{_get_hf_endpoint()}/{repo_id}/resolve/main/model.safetensors.index.json"
-        index_data = json.loads(_make_request(index_url).decode("utf-8"))
+        index_data = json.loads(_make_request(index_url, timeout=timeout).decode("utf-8"))
         
         weight_map = index_data.get("weight_map", {})
         unique_shards = list(set(weight_map.values()))
@@ -146,7 +151,7 @@ def fetch_huggingface_repo(repo_id: str, fetch_tensors: bool = False) -> Tuple[D
             }
         else:
             def fetch_shard(shard: str):
-                return shard, _fetch_safetensors_header(repo_id, shard)
+                return shard, _fetch_safetensors_header(repo_id, shard, timeout=timeout)
                 
             with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, min(8, len(unique_shards)))) as executor:
                 future_to_shard = {executor.submit(fetch_shard, shard): shard for shard in unique_shards}
@@ -172,12 +177,12 @@ def fetch_shard(shard: str):
         if token:
             req.add_header("Authorization", f"Bearer {token}")
         try:
-            with urllib.request.urlopen(req) as response:
+            with urllib.request.urlopen(req, timeout=timeout) as response:
                 total_size = int(response.headers.get("Content-Length", 0))
         except Exception:
             pass
 
-        header = _fetch_safetensors_header(repo_id, "model.safetensors")
+        header = _fetch_safetensors_header(repo_id, "model.safetensors", timeout=timeout)
         tensors = header
             
         format_name = "SafeTensors"

From bd6ce163d5d6b5e4155ecf45c560fd28fa958d24 Mon Sep 17 00:00:00 2001
From: Rupayon Haldar <80724680+rupayon123@users.noreply.github.com>
Date: Sat, 20 Jun 2026 06:13:11 -0400
Subject: [PATCH 3/3] Add CLI timeout tests

---
 tests/test_cli.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index e5f9c02..4572435 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -39,6 +39,32 @@ def test_batch_size_flag_rejects_negative():
     assert exc_info.value.code == 2
 
 
+def test_timeout_flag_defaults_to_ten_seconds():
+    args = parse_args(["model.gguf"])
+
+    assert args.timeout == 10.0
+
+
+def test_timeout_flag_accepts_float():
+    args = parse_args(["--timeout", "30.5", "model.gguf"])
+
+    assert args.timeout == 30.5
+
+
+def test_timeout_flag_rejects_zero():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--timeout", "0", "model.gguf"])
+
+    assert exc_info.value.code == 2
+
+
+def test_timeout_flag_rejects_negative():
+    with pytest.raises(SystemExit) as exc_info:
+        parse_args(["--timeout", "-1", "model.gguf"])
+
+    assert exc_info.value.code == 2
+
+
 def test_analyze_model_passes_batch_size_to_footprint(monkeypatch, tmp_path):
     model_path = tmp_path / "model.gguf"
     model_path.write_bytes(b"mock")
@@ -77,3 +103,63 @@ def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
 
     assert captured == {"batch_size": 4, "context_length": 128}
     assert info["footprint"]["kv_cache_bytes"] == 4.0
+
+
+def test_analyze_model_passes_timeout_to_huggingface(monkeypatch):
+    captured = {}
+
+    def fake_exists(path):
+        return False
+
+    def fake_fetch(repo_id, *, fetch_tensors, timeout):
+        captured["repo_id"] = repo_id
+        captured["fetch_tensors"] = fetch_tensors
+        captured["timeout"] = timeout
+        return (
+            {
+                "model.layers.0.self_attn.k_proj.weight": {
+                    "shape": [1, 1],
+                    "dtype": "F16",
+                }
+            },
+            None,
+            "SafeTensors",
+            7.0,
+        )
+
+    def fake_calculate_footprint(tensors, *, context_length, batch_size, **kwargs):
+        return {
+            "total_params": 1,
+            "base_memory_bytes": 2.0,
+            "kv_cache_bytes": 1.0,
+            "overhead_bytes": 0.0,
+            "total_memory_bytes": 3.0,
+            "num_layers": 1,
+            "kv_dim": 1,
+            "primary_dtype": "F16",
+            "kv_is_estimate": False,
+            "penalty_percentage": 0.0,
+            "vllm_metrics": {},
+        }
+
+    from modelinfo.parsers import huggingface
+
+    monkeypatch.setattr(cli.os.path, "exists", fake_exists)
+    monkeypatch.setattr(huggingface, "fetch_huggingface_repo", fake_fetch)
+    monkeypatch.setattr(cli, "calculate_footprint", fake_calculate_footprint)
+    monkeypatch.setattr(
+        cli, "identify_architecture_name", lambda tensors, num_layers, config: "Mock"
+    )
+
+    cli.analyze_model(
+        "org/model",
+        context_override=128,
+        fetch_tensors=True,
+        timeout=22.5,
+    )
+
+    assert captured == {
+        "repo_id": "org/model",
+        "fetch_tensors": True,
+        "timeout": 22.5,
+    }