#!/usr/bin/env python3 """VMware vSphere collector for NetBox via Diode SDK. Discovers ESXi hosts, VMs, interfaces, IPs, and disks from a vCenter or standalone ESXi host and ingests them into NetBox via the Diode pipeline. Usage: python collectors/vmware_collector.py --dry-run python collectors/vmware_collector.py """ import argparse import atexit import logging import os import re import ssl import sys from pyVim.connect import SmartConnect, Disconnect from pyVmomi import vim, vmodl from netboxlabs.diode.sdk import DiodeClient, DiodeDryRunClient from netboxlabs.diode.sdk.ingester import ( Cluster, ClusterGroup, ClusterType, Device, DeviceRole, DeviceType, Entity, Interface, IPAddress, Manufacturer, Platform, Site, VirtualDisk, VirtualMachine, VMInterface, ) log = logging.getLogger("vmware-collector") # --------------------------------------------------------------------------- # Status mappings # --------------------------------------------------------------------------- VM_POWER_STATE_MAP = { "poweredOn": "active", "poweredOff": "offline", "suspended": "offline", } HOST_STATUS_MAP = { "green": "active", "yellow": "active", "red": "failed", "gray": "planned", } SPEED_TO_TYPE = { 100: "100base-tx", 1000: "1000base-t", 2500: "2.5gbase-t", 10000: "10gbase-x-sfpp", 25000: "25gbase-x-sfp28", 40000: "40gbase-x-qsfpp", 100000: "100gbase-x-qsfp28", } # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- def load_dotenv(path: str = ".env") -> None: if not os.path.isfile(path): return with open(path) as fh: for line in fh: line = line.strip() if not line or line.startswith("#") or "=" not in line: continue key, _, val = line.partition("=") os.environ.setdefault(key.strip(), val.strip().strip("\"'")) def get_config() -> dict: return { "host": os.environ.get("VCENTER_HOST", ""), "user": os.environ.get("VCENTER_USER", "administrator@vsphere.local"), "password": os.environ.get("VCENTER_PASSWORD", ""), "port": int(os.environ.get("VCENTER_PORT", "443")), "verify_ssl": os.environ.get("VCENTER_VERIFY_SSL", "false").lower() == "true", "site": os.environ.get("VCENTER_SITE", "main"), } # --------------------------------------------------------------------------- # Reference helpers # --------------------------------------------------------------------------- def _device_ref(name: str, model: str, manufacturer: str, role: str, site_name: str) -> Device: return Device( name=name, device_type=DeviceType( model=model, manufacturer=Manufacturer(name=manufacturer), ), role=DeviceRole(name=role), site=Site(name=site_name), ) def _vm_ref(name: str, cluster_name: str, site_name: str, role: str = "Virtual Machine") -> VirtualMachine: return VirtualMachine( name=name, site=Site(name=site_name), cluster=Cluster( name=cluster_name, type=ClusterType(name="VMware ESXi"), scope_site=Site(name=site_name), ), role=DeviceRole(name=role), ) # --------------------------------------------------------------------------- # vSphere connection # --------------------------------------------------------------------------- def connect_vsphere(cfg: dict): """Connect to vCenter/ESXi and return ServiceInstance.""" host = cfg["host"] if not host: log.error("VCENTER_HOST not set") sys.exit(1) context = None if not cfg["verify_ssl"]: context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context.check_hostname = False context.verify_mode = ssl.CERT_NONE si = SmartConnect( host=host, user=cfg["user"], pwd=cfg["password"], port=cfg["port"], sslContext=context, ) atexit.register(Disconnect, si) log.info("Connected to vSphere: %s", host) return si def get_all_objects(si, obj_type, folder=None): """Get all managed objects of a given type.""" content = si.RetrieveContent() container = content.viewManager.CreateContainerView( folder or content.rootFolder, [obj_type], True ) objects = list(container.view) container.Destroy() return objects # --------------------------------------------------------------------------- # Entity builders # --------------------------------------------------------------------------- def build_cluster_entities(si, site_name: str) -> list[Entity]: """Build Cluster entities from vSphere clusters.""" entities = [] clusters = get_all_objects(si, vim.ClusterComputeResource) for cluster in clusters: dc_name = "" parent = cluster.parent while parent: if isinstance(parent, vim.Datacenter): dc_name = parent.name break parent = getattr(parent, "parent", None) entities.append(Entity(cluster=Cluster( name=cluster.name, type=ClusterType(name="VMware ESXi"), scope_site=Site(name=site_name), group=ClusterGroup(name=dc_name) if dc_name else None, status="active", tags=["vmware"], ))) log.info(" Cluster: %s (DC: %s)", cluster.name, dc_name or "none") return entities def build_host_entities(si, site_name: str) -> tuple[list[Entity], dict]: """Build Device entities from ESXi hosts. Returns entities and host-to-cluster mapping.""" entities = [] host_cluster_map = {} hosts = get_all_objects(si, vim.HostSystem) for host in hosts: hostname = host.name hw = host.hardware sys_info = hw.systemInfo if hw else None model = sys_info.model if sys_info else "Unknown" vendor = sys_info.vendor if sys_info else "Unknown" serial = "" if sys_info: for ident in (sys_info.otherIdentifyingInfo or []): if hasattr(ident, "identifierType") and \ ident.identifierType and \ ident.identifierType.key == "ServiceTag": serial = ident.identifierValue break if not serial: serial = getattr(sys_info, "serialNumber", "") or "" status = HOST_STATUS_MAP.get( str(host.overallStatus) if host.overallStatus else "gray", "active" ) # Determine cluster cluster_name = "" if isinstance(host.parent, vim.ClusterComputeResource): cluster_name = host.parent.name host_cluster_map[hostname] = cluster_name entities.append(Entity(device=Device( name=hostname, device_type=DeviceType( model=model, manufacturer=Manufacturer(name=vendor), ), role=DeviceRole(name="Hypervisor"), platform=Platform(name="VMware ESXi"), site=Site(name=site_name), serial=serial[:50] if serial else "", status=status, tags=["vmware"], ))) # Physical NICs dev_ref = _device_ref(hostname, model, vendor, "Hypervisor", site_name) if host.config and host.config.network: for pnic in (host.config.network.pnic or []): speed_mbps = 0 if pnic.linkSpeed: speed_mbps = pnic.linkSpeed.speedMb iface_type = SPEED_TO_TYPE.get(speed_mbps, "1000base-t") entities.append(Entity(interface=Interface( device=dev_ref, name=pnic.device, type=iface_type, mac_address=pnic.mac or "", speed=speed_mbps * 1000 if speed_mbps else 0, enabled=True, tags=["vmware"], ))) # VMkernel interfaces for vnic in (host.config.network.vnic or []): ip_str = "" if vnic.spec and vnic.spec.ip: ip = vnic.spec.ip.ipAddress mask = vnic.spec.ip.subnetMask if ip: prefix_len = _mask_to_prefix(mask) if mask else 24 ip_str = f"{ip}/{prefix_len}" entities.append(Entity(interface=Interface( device=dev_ref, name=vnic.device, type="virtual", mac_address=vnic.spec.mac if vnic.spec else "", enabled=True, tags=["vmware", "vmkernel"], ))) if ip_str: entities.append(Entity(ip_address=IPAddress( address=ip_str, status="active", assigned_object_interface=Interface( device=dev_ref, name=vnic.device, type="virtual", ), tags=["vmware"], ))) log.info(" Host: %s (%s %s, cluster=%s)", hostname, vendor, model, cluster_name or "standalone") return entities, host_cluster_map def build_vm_entities(si, site_name: str, host_cluster_map: dict) -> list[Entity]: """Build VirtualMachine + VMInterface + VirtualDisk + IPAddress entities.""" entities = [] vms = get_all_objects(si, vim.VirtualMachine) for vm_obj in vms: try: vm_name = vm_obj.name config = vm_obj.config if not config: log.debug(" Skipping VM with no config: %s", vm_name) continue # Determine cluster from host host_name = "" cluster_name = "" if vm_obj.runtime and vm_obj.runtime.host: host_name = vm_obj.runtime.host.name cluster_name = host_cluster_map.get(host_name, "") if not cluster_name: cluster_name = host_name or "standalone" power_state = str(vm_obj.runtime.powerState) if vm_obj.runtime else "poweredOff" status = VM_POWER_STATE_MAP.get(power_state, "offline") # Resources vcpus = config.hardware.numCPU if config.hardware else 0 memory_mb = config.hardware.memoryMB if config.hardware else 0 total_disk_gb = 0 # Determine platform from guest guest_os = config.guestFullName or config.guestId or "" platform_name = None if "linux" in guest_os.lower() or "ubuntu" in guest_os.lower() or \ "centos" in guest_os.lower() or "debian" in guest_os.lower(): platform_name = "Linux" elif "windows" in guest_os.lower(): platform_name = "Windows" # Collect IPs for primary_ip4 primary_ip4 = None vm_ips = [] # Guest NIC info (requires VMware Tools) if vm_obj.guest and vm_obj.guest.net: for guest_nic in vm_obj.guest.net: if guest_nic.ipConfig: for ip_entry in guest_nic.ipConfig.ipAddress: addr = ip_entry.ipAddress prefix = ip_entry.prefixLength if addr and not addr.startswith("fe80") and \ not addr.startswith("127."): ip_str = f"{addr}/{prefix}" nic_name = guest_nic.network or "eth0" vm_ips.append((ip_str, nic_name)) if not primary_ip4 and ":" not in addr: primary_ip4 = ip_str # VirtualMachine entity vm_kwargs = dict( name=vm_name, status=status, site=Site(name=site_name), cluster=Cluster( name=cluster_name, type=ClusterType(name="VMware ESXi"), scope_site=Site(name=site_name), ), role=DeviceRole(name="Virtual Machine"), vcpus=vcpus, memory=memory_mb, comments=f"Guest: {guest_os}" if guest_os else "", tags=["vmware"], ) if platform_name: vm_kwargs["platform"] = Platform(name=platform_name) if primary_ip4: vm_kwargs["primary_ip4"] = IPAddress(address=primary_ip4) entities.append(Entity(virtual_machine=VirtualMachine(**vm_kwargs))) # VM NICs vm_ref = _vm_ref(vm_name, cluster_name, site_name) if config.hardware and config.hardware.device: for device in config.hardware.device: if isinstance(device, vim.vm.device.VirtualEthernetCard): nic_name = device.deviceInfo.label if device.deviceInfo else f"nic{device.key}" mac = getattr(device, "macAddress", "") or "" net_name = "" if hasattr(device, "backing"): backing = device.backing if hasattr(backing, "network") and backing.network: net_name = backing.network.name elif hasattr(backing, "deviceName"): net_name = backing.deviceName entities.append(Entity(vm_interface=VMInterface( virtual_machine=vm_ref, name=nic_name[:64], enabled=device.connectable.connected if device.connectable else True, mac_address=mac, description=net_name[:200] if net_name else "", tags=["vmware"], ))) # Virtual Disks elif isinstance(device, vim.vm.device.VirtualDisk): disk_name = device.deviceInfo.label if device.deviceInfo else f"disk{device.key}" disk_size_gb = device.capacityInKB // (1024 * 1024) if device.capacityInKB else 0 total_disk_gb += disk_size_gb if disk_size_gb > 0: entities.append(Entity(virtual_disk=VirtualDisk( virtual_machine=vm_ref, name=disk_name[:64], size=disk_size_gb, tags=["vmware"], ))) # IP entities from guest tools for ip_str, nic_name in vm_ips: entities.append(Entity(ip_address=IPAddress( address=ip_str, status="active", assigned_object_vm_interface=VMInterface( virtual_machine=vm_ref, name=nic_name[:64], ), tags=["vmware"], ))) log.info(" VM: %s (%s, %d vCPU, %d MB RAM, %d GB disk)", vm_name, status, vcpus, memory_mb, total_disk_gb) except Exception as exc: log.error(" Failed to process VM %s: %s", getattr(vm_obj, "name", "?"), exc) return entities def _mask_to_prefix(mask: str) -> int: """Convert subnet mask to prefix length.""" try: return sum(bin(int(x)).count("1") for x in mask.split(".")) except (ValueError, AttributeError): return 24 # --------------------------------------------------------------------------- # Orchestration # --------------------------------------------------------------------------- def collect_all_entities(cfg: dict) -> list[Entity]: si = connect_vsphere(cfg) site_name = cfg["site"] entities: list[Entity] = [] # Clusters entities.extend(build_cluster_entities(si, site_name)) # ESXi hosts + interfaces host_entities, host_cluster_map = build_host_entities(si, site_name) entities.extend(host_entities) # VMs entities.extend(build_vm_entities(si, site_name, host_cluster_map)) return entities def ingest_entities(entities: list[Entity], dry_run: bool = False) -> None: if not entities: log.warning("No entities to ingest") return target = os.environ.get("DIODE_TARGET", "grpc://localhost:8080/diode") client_id = os.environ.get("DIODE_CLIENT_ID", os.environ.get("INGESTER_CLIENT_ID", "diode-ingester")) client_secret = os.environ.get("DIODE_CLIENT_SECRET", os.environ.get("INGESTER_CLIENT_SECRET", "")) if dry_run: log.info("DRY RUN: %d entities would be ingested", len(entities)) for i, e in enumerate(entities): log.info(" [%d] %s", i, e) return if not client_secret: log.error("DIODE_CLIENT_SECRET not set — cannot ingest") sys.exit(1) log.info("Ingesting %d entities to %s ...", len(entities), target) from netboxlabs.diode.sdk.ingester import create_message_chunks with DiodeClient( target=target, client_id=client_id, client_secret=client_secret, app_name="vmware-collector", app_version="0.1.0", ) as client: chunks = create_message_chunks(entities) for idx, chunk in enumerate(chunks): resp = client.ingest(entities=chunk) if resp.errors: log.error("Chunk %d errors: %s", idx, resp.errors) else: log.info("Chunk %d: %d entities ingested", idx, len(chunk)) def main(): parser = argparse.ArgumentParser(description="VMware vSphere collector for NetBox") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--log-level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"]) parser.add_argument("--env-file", default=".env") args = parser.parse_args() logging.basicConfig( level=getattr(logging, args.log_level), format="%(asctime)s %(name)s %(levelname)s %(message)s", ) load_dotenv(args.env_file) cfg = get_config() entities = collect_all_entities(cfg) log.info("Total entities: %d", len(entities)) ingest_entities(entities, dry_run=args.dry_run) log.info("Done!") if __name__ == "__main__": main()