pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 Note: If 404 error, proceed to Option B (source build). 3.4.1 Install Dependencies pip install cmake ninja pip install -r requirements.txt 3.4.2 Clone PyTorch git clone --recursive https://github.com/pytorch/pytorch cd pytorch git checkout v2.5.0 # or newer stable tag git submodule sync git submodule update --init --recursive 3.4.3 Configure and Build export CMAKE_PREFIX_PATH=$CONDA_PREFIX:-"$(dirname $(which conda))/../" export USE_CUDA=1 export CUDA_VERSION=12.6 export TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;10.0" # adjust to your GPU python setup.py develop 3.4.4 Install torchvision (Optional) git clone https://github.com/pytorch/vision cd vision python setup.py develop 4. Verification 4.1 Basic CUDA Accessibility Test import torch print(f"PyTorch version: torch.__version__") print(f"CUDA available: torch.cuda.is_available()") print(f"CUDA version: torch.version.cuda") print(f"cuDNN version: torch.backends.cudnn.version()") print(f"GPU device: torch.cuda.get_device_name(0)") 4.2 Memory and Compute Test x = torch.randn(10000, 10000).cuda() y = torch.randn(10000, 10000).cuda() z = torch.matmul(x, y) print(f"Matrix multiplication result shape: z.shape") print(f"Peak memory: torch.cuda.max_memory_allocated() / 1e9:.2f GB") 4.3 Performance Benchmark (Optional) import time def benchmark(device='cuda'): a = torch.randn(4096, 4096, device=device) b = torch.randn(4096, 4096, device=device) torch.cuda.synchronize() start = time.time() for _ in range(100): torch.mm(a, b) torch.cuda.synchronize() return (time.time() - start) / 100