From c285810c680fe2c319bd8cefb4bca0a88c6c1597 Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 9 Mar 2026 10:58:34 -0700 Subject: [PATCH] Two-phase focused neighbor walk and fix status/optics bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Restructure neighbor walk into Phase 1 (discovery: ifDescr + ifName + ifStackTable) and Phase 2 (targeted snmpget for matched interfaces only). Reduces NCS 5500 walk from ~150k OIDs to ~20k discovery + ~600 targeted. - Rename cisco-parse.py to cisco_parse.py for Python import compatibility. - Add parse_walk_text() for in-process parsing without file I/O. - Fix interface status showing DOWN/ADMIN DOWN: use isUp() instead of hardcoded === '1' checks, add -Oe flag to snmpget for numeric enums. - Fix optics showing raw sensor values: apply entSensorPrecision scaling (e.g., -95122 with precision 4 → -9.5122 dBm). Co-Authored-By: Claude Opus 4.6 --- build_nid_viewer.py | 10 +- cisco-parse.py => cisco_parse.py | 154 +++++++++++-------- nid-server.py | 249 +++++++++++++++++++++++++------ 3 files changed, 303 insertions(+), 110 deletions(-) rename cisco-parse.py => cisco_parse.py (89%) diff --git a/build_nid_viewer.py b/build_nid_viewer.py index c83d656..12039e4 100644 --- a/build_nid_viewer.py +++ b/build_nid_viewer.py @@ -2056,8 +2056,8 @@ function renderNeighbor() {{ const optics = ndata.optics || {{}}; const shortName = (sys.sysName || ip).split('.')[0]; - const adminUp = qi.ifAdminStatus === '1'; - const operUp = qi.ifOperStatus === '1'; + const adminUp = isUp(qi.ifAdminStatus); + const operUp = isUp(qi.ifOperStatus); const statusClass = !adminUp ? 'admin-down' : operUp ? 'up' : 'down'; const statusText = !adminUp ? 'Admin Down' : operUp ? 'Up' : 'Down'; @@ -2110,14 +2110,14 @@ function renderNeighbor() {{ let subRows = ''; for (const sk of subKeys) {{ const s = subs[sk]; - const sUp = s.ifOperStatus === '1'; + const sUp = isUp(s.ifOperStatus); subRows += ` ${{esc(s.ifDescr || s.ifName || '?')}} ${{s.vlanId || '?'}} ${{sUp ? 'Up' : 'Down'}} ${{esc(s.ifAlias || '')}} ${{s.bvi_ifDescr ? esc(s.bvi_ifDescr) : '—'}} - ${{s.bvi_ifDescr ? `${{s.bvi_ifOperStatus === '1' ? 'Up' : 'Down'}}` : ''}} + ${{s.bvi_ifDescr ? `${{isUp(s.bvi_ifOperStatus) ? 'Up' : 'Down'}}` : ''}} `; }} subsHtml = ` @@ -2134,7 +2134,7 @@ function renderNeighbor() {{ let vlanRows = ''; for (const vk of vlanKeys.sort((a,b) => parseInt(a) - parseInt(b))) {{ const v = vlans[vk]; - const vUp = v.ifOperStatus === '1'; + const vUp = isUp(v.ifOperStatus); vlanRows += ` Vlan${{vk}} ${{vUp ? 'Up' : 'Down'}} diff --git a/cisco-parse.py b/cisco_parse.py similarity index 89% rename from cisco-parse.py rename to cisco_parse.py index 1d8a8b8..757648d 100644 --- a/cisco-parse.py +++ b/cisco_parse.py @@ -103,73 +103,75 @@ OID_CISCO_SENSOR_PREFIX = ".1.3.6.1.4.1.9.9.91.1.1.1.1" # Walk file parser # ──────────────────────────────────────────────────────────────────────── -def parse_walk_file(walk_file): - """Parse an snmpbulkwalk -On -OQ output file into {oid: value} dict. - - Lines look like: - .1.3.6.1.2.1.2.2.1.2.62 = "TenGigabitEthernet1/1/3" - .1.3.6.1.2.1.2.2.1.7.62 = 1 +def _parse_lines(lines): + """Parse snmpbulkwalk/snmpget -On -OQ output lines into {oid: value} dict. + Handles multi-line quoted values (e.g., Cisco sysDescr). String values have surrounding quotes stripped. """ - walk_path = Path(walk_file) oid_data = {} + pending_oid = None + pending_val = None - with walk_path.open("r", errors="replace") as fh: - pending_oid = None - pending_val = None + for raw_line in lines: + line = raw_line.rstrip("\n\r") - for raw_line in fh: - line = raw_line.rstrip("\n\r") - - # Handle multi-line values (e.g., Cisco sysDescr spans multiple lines) - if pending_oid is not None: - # Continuation of a multi-line quoted value - pending_val += " " + line.strip() - if '"' in line: - # Closing quote found — finalize - val = pending_val.strip() - if val.startswith('"'): - val = val[1:] - if val.endswith('"'): - val = val[:-1] - oid_data[pending_oid] = val - pending_oid = None - pending_val = None - continue - - line = line.strip() - if not line or line.startswith("#"): - continue - - # Split on first ' = ' - parts = line.split(" = ", 1) - if len(parts) != 2: - continue - - oid = parts[0].strip() - value = parts[1].strip() - - # Check for opening quote without closing (multi-line value) - if value.startswith('"') and not value.endswith('"'): - pending_oid = oid - pending_val = value - continue - - # Strip surrounding quotes - if len(value) >= 2 and value[0] == '"' and value[-1] == '"': - value = value[1:-1] - - oid_data[oid] = value - - # Handle any trailing pending value if pending_oid is not None: - val = pending_val.strip().strip('"') - oid_data[pending_oid] = val + pending_val += " " + line.strip() + if '"' in line: + val = pending_val.strip() + if val.startswith('"'): + val = val[1:] + if val.endswith('"'): + val = val[:-1] + oid_data[pending_oid] = val + pending_oid = None + pending_val = None + continue + + line = line.strip() + if not line or line.startswith("#"): + continue + + parts = line.split(" = ", 1) + if len(parts) != 2: + continue + + oid = parts[0].strip() + value = parts[1].strip() + + if value.startswith('"') and not value.endswith('"'): + pending_oid = oid + pending_val = value + continue + + if len(value) >= 2 and value[0] == '"' and value[-1] == '"': + value = value[1:-1] + + oid_data[oid] = value + + if pending_oid is not None: + val = pending_val.strip().strip('"') + oid_data[pending_oid] = val return oid_data +def parse_walk_file(walk_file): + """Parse an snmpbulkwalk -On -OQ output file into {oid: value} dict.""" + walk_path = Path(walk_file) + with walk_path.open("r", errors="replace") as fh: + return _parse_lines(fh) + + +def parse_walk_text(text): + """Parse snmpbulkwalk/snmpget -On -OQ output from a string. + + Useful for in-process parsing without writing to a file first. + """ + return _parse_lines(text.splitlines()) + + # ──────────────────────────────────────────────────────────────────────── # Helpers # ──────────────────────────────────────────────────────────────────────── @@ -539,6 +541,14 @@ def get_optics_info(oid_data, parent_ifindex, parent_descr): # entSensorType: .1.3.6.1.4.1.9.9.91.1.1.1.1.1.{idx} sensor_type_prefix = ".1.3.6.1.4.1.9.9.91.1.1.1.1.1." + # entSensorPrecision: .1.3.6.1.4.1.9.9.91.1.1.1.1.3.{idx} + # Number of decimal places to apply to entSensorValue + sensor_precision_prefix = ".1.3.6.1.4.1.9.9.91.1.1.1.1.3." + + # entSensorScale: .1.3.6.1.4.1.9.9.91.1.1.1.1.2.{idx} + # Scale factor (1=yocto..9=units..17=exa) — 9 means no scaling + sensor_scale_prefix = ".1.3.6.1.4.1.9.9.91.1.1.1.1.2." + # Find entity indices that match the interface short_name = _shorten_name(parent_descr) matching_ent_indices = [] @@ -556,6 +566,25 @@ def get_optics_info(oid_data, parent_ifindex, parent_descr): _dbg(f"Found {len(matching_ent_indices)} entity entries for {parent_descr}") + def _scale_sensor_value(raw_val, ent_idx): + """Apply entSensorPrecision to scale a raw sensor value.""" + try: + val = float(raw_val) + except (ValueError, TypeError): + return raw_val + + precision = _get(oid_data, f"{sensor_precision_prefix}{ent_idx}") + try: + prec = int(precision) + except (ValueError, TypeError): + prec = 0 + + if prec > 0: + val = val / (10 ** prec) + + # Round to avoid floating point noise + return str(round(val, prec if prec > 0 else 1)) + # For matching entities, look up sensor readings for ent_idx in matching_ent_indices: sensor_val = _get(oid_data, f"{sensor_value_prefix}{ent_idx}") @@ -566,20 +595,23 @@ def get_optics_info(oid_data, parent_ifindex, parent_descr): sensor_type = _get(oid_data, f"{sensor_type_prefix}{ent_idx}") ent_descr = _get(oid_data, f"{ent_descr_prefix}{ent_idx}").lower() + # Scale the raw value using entSensorPrecision + scaled = _scale_sensor_value(sensor_val, ent_idx) + # entSensorType: 8 = celsius, 14 = dBm # Also check description text for classification if sensor_type == "8" or "temperature" in ent_descr or "temp" in ent_descr: - result["temperature"] = sensor_val + result["temperature"] = scaled elif "transmit" in ent_descr or "tx" in ent_descr: - result["txPower"] = sensor_val + result["txPower"] = scaled elif "receive" in ent_descr or "rx" in ent_descr: - result["rxPower"] = sensor_val + result["rxPower"] = scaled elif sensor_type == "14": # dBm but unclassified — assign to first empty power slot if result["txPower"] is None: - result["txPower"] = sensor_val + result["txPower"] = scaled elif result["rxPower"] is None: - result["rxPower"] = sensor_val + result["rxPower"] = scaled return result diff --git a/nid-server.py b/nid-server.py index 70090af..3f5da95 100644 --- a/nid-server.py +++ b/nid-server.py @@ -30,14 +30,40 @@ from pathlib import Path SCRIPT_DIR = Path(__file__).resolve().parent WALKS_DIR = SCRIPT_DIR / "walks" -# ── OID subtrees for neighbor (Cisco) device walk ──────────────────── -NEIGHBOR_TARGETED_OIDS = [ - (".1.3.6.1.2.1.1", "System"), - (".1.3.6.1.2.1.2.2.1", "ifTable"), - (".1.3.6.1.2.1.31.1.1.1", "ifXTable"), - (".1.3.6.1.2.1.31.1.2", "ifStackTable"), - (".1.3.6.1.2.1.17.7.1.4.3.1", "dot1qVlanStatic"), - (".1.3.6.1.4.1.9.9.46.1.3.1", "vtpVlanTable"), +# ── Phase 1: Discovery OIDs (lightweight, column-specific walks) ────── +# On an NCS 5500 with 10k+ interfaces, walking full ifTable/ifXTable would +# return ~150k OIDs. Instead, walk only the columns needed to identify the +# target interface and its children. ~2 OIDs per interface for discovery. +NEIGHBOR_DISCOVERY_OIDS = [ + (".1.3.6.1.2.1.1", "System"), # ~8 OIDs + (".1.3.6.1.2.1.2.2.1.2", "ifDescr"), # 1 column: ifDescr + (".1.3.6.1.2.1.31.1.1.1.1", "ifName"), # 1 column: ifName + (".1.3.6.1.2.1.31.1.2", "ifStackTable"), # parent-child relationships +] + +# ── Phase 2: Per-interface OID suffixes for targeted snmpget ───────── +# After matching the target ifIndex + children, we GET only these OIDs +# for each relevant interface. ~15 OIDs per interface instead of ~150k total. +NEIGHBOR_INTERFACE_OID_BASES = [ + ".1.3.6.1.2.1.2.2.1.3", # ifType + ".1.3.6.1.2.1.2.2.1.4", # ifMtu + ".1.3.6.1.2.1.2.2.1.5", # ifSpeed + ".1.3.6.1.2.1.2.2.1.7", # ifAdminStatus + ".1.3.6.1.2.1.2.2.1.8", # ifOperStatus + ".1.3.6.1.2.1.2.2.1.10", # ifInOctets + ".1.3.6.1.2.1.2.2.1.13", # ifInDiscards + ".1.3.6.1.2.1.2.2.1.14", # ifInErrors + ".1.3.6.1.2.1.2.2.1.16", # ifOutOctets + ".1.3.6.1.2.1.2.2.1.19", # ifOutDiscards + ".1.3.6.1.2.1.2.2.1.20", # ifOutErrors + ".1.3.6.1.2.1.31.1.1.1.6", # ifHCInOctets + ".1.3.6.1.2.1.31.1.1.1.10", # ifHCOutOctets + ".1.3.6.1.2.1.31.1.1.1.15", # ifHighSpeed + ".1.3.6.1.2.1.31.1.1.1.18", # ifAlias +] + +# ── Extra subtrees to walk for IOS-XE VLAN/optics (Phase 2 optional) ─ +NEIGHBOR_EXTRA_OIDS = [ (".1.3.6.1.2.1.47.1.1.1", "entPhysicalTable"), (".1.3.6.1.4.1.9.9.91.1.1.1", "ciscoEntitySensor"), ] @@ -263,8 +289,80 @@ def build_neighbor_snmp_auth() -> list: return ["-v", NEIGHBOR_SNMP_VERSION, "-c", NEIGHBOR_SNMP_COMMUNITY] +def _walk_subtrees_parallel(walk_cmd, auth, target, oid_list, status_prefix=""): + """Walk a list of (oid, label) subtrees in parallel. Returns combined text.""" + total = len(oid_list) + completed = [0] + results_map = {} + + def walk_one(idx, oid, label): + try: + res = subprocess.run( + [walk_cmd, "-On", "-OQ"] + auth + [target, oid], + capture_output=True, text=True, timeout=60, + ) + completed[0] += 1 + with neighbor_lock: + neighbor_status[target] = { + "state": "walking", + "message": f"{status_prefix}({completed[0]}/{total})", + } + return idx, res.stdout + except subprocess.TimeoutExpired: + completed[0] += 1 + return idx, "" + + with ThreadPoolExecutor(max_workers=4) as pool: + futures = [ + pool.submit(walk_one, i, oid, label) + for i, (oid, label) in enumerate(oid_list) + ] + for fut in as_completed(futures): + idx, output = fut.result() + if output.strip(): + results_map[idx] = output + + return "\n".join(results_map[i] for i in sorted(results_map)) + + +def _snmpget_batch(walk_cmd, auth, target, oid_list): + """Run snmpget for a batch of specific OIDs. Returns raw output text. + + Uses snmpget (not bulkwalk) since we're requesting exact OIDs. + Falls back to individual gets if batch fails. + """ + if not oid_list: + return "" + + # snmpget can handle multiple OIDs in one call (much faster than individual) + # Split into chunks of 30 to avoid command-line length limits + all_output = [] + for i in range(0, len(oid_list), 30): + chunk = oid_list[i:i + 30] + try: + res = subprocess.run( + ["snmpget", "-On", "-OQ", "-Oe"] + auth + [target] + chunk, + capture_output=True, text=True, timeout=30, + ) + if res.stdout.strip(): + all_output.append(res.stdout) + except subprocess.TimeoutExpired: + pass + + return "\n".join(all_output) + + def run_neighbor_walk(target: str, rem_port_id: str, rem_sys_name: str = ""): - """Execute a targeted SNMP walk against an LLDP neighbor device.""" + """Execute a two-phase focused SNMP walk against an LLDP neighbor device. + + Phase 1 (Discovery): Walk System + ifDescr + ifName + ifStackTable + → Identify target ifIndex and child subinterfaces + Phase 2 (Targeted): snmpget ~15 OIDs per matched interface only + → Full interface facts for only the relevant interfaces + + On an NCS 5500 with 10k interfaces, this reduces from ~150k OIDs + to ~20k discovery + ~600 targeted GETs. + """ ip_re = re.compile(r"^\d{1,3}(\.\d{1,3}){3}$") if not ip_re.match(target): with neighbor_lock: @@ -272,7 +370,7 @@ def run_neighbor_walk(target: str, rem_port_id: str, rem_sys_name: str = ""): return with neighbor_lock: - neighbor_status[target] = {"state": "walking", "message": "Starting neighbor walk..."} + neighbor_status[target] = {"state": "walking", "message": "Phase 1: Discovering interfaces..."} timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") safe_ip = target.replace(".", "-") @@ -284,40 +382,103 @@ def run_neighbor_walk(target: str, rem_port_id: str, rem_sys_name: str = ""): t_start = time.time() try: - # Walk neighbor subtrees in parallel - total = len(NEIGHBOR_TARGETED_OIDS) - completed = [0] - results_map = {} + # ── Phase 1: Discovery walk ────────────────────────────────── + discovery_output = _walk_subtrees_parallel( + walk_cmd, auth, target, NEIGHBOR_DISCOVERY_OIDS, + status_prefix="Phase 1: Discovery " + ) - def walk_subtree(idx, oid, label): - try: - res = subprocess.run( - [walk_cmd, "-On", "-OQ"] + auth + [target, oid], - capture_output=True, text=True, timeout=60, - ) - completed[0] += 1 - with neighbor_lock: - neighbor_status[target] = { - "state": "walking", - "message": f"Walking subtrees ({completed[0]}/{total})", - } - return idx, res.stdout - except subprocess.TimeoutExpired: - completed[0] += 1 - return idx, "" + if not discovery_output.strip(): + with neighbor_lock: + neighbor_status[target] = { + "state": "error", + "message": "Discovery walk returned no data — check credentials", + } + return - with ThreadPoolExecutor(max_workers=4) as pool: - futures = [ - pool.submit(walk_subtree, i, oid, label) - for i, (oid, label) in enumerate(NEIGHBOR_TARGETED_OIDS) - ] - for fut in as_completed(futures): - idx, output = fut.result() - if output.strip(): - results_map[idx] = output + # Parse discovery data in-process to find target interface + sys.path.insert(0, str(SCRIPT_DIR)) + from cisco_parse import parse_walk_text, build_interface_index, match_rem_port_id, \ + discover_subinterfaces_stack, discover_subinterfaces_pattern - output_lines = [results_map[i] for i in sorted(results_map)] - walk_file.write_text("\n".join(output_lines)) + discovery_oids = parse_walk_text(discovery_output) + interfaces = build_interface_index(discovery_oids) + discovery_count = len(discovery_oids) + + with neighbor_lock: + neighbor_status[target] = { + "state": "walking", + "message": f"Phase 1 done: {discovery_count:,} OIDs, {len(interfaces)} interfaces. Matching...", + } + + matched_ifindex = match_rem_port_id(interfaces, rem_port_id) + + if matched_ifindex is None: + # Write what we have and let cisco-parse produce a best-effort result + walk_file.write_text(discovery_output) + else: + # Find child interfaces (subinterfaces) + parent_descr = interfaces.get(matched_ifindex, {}).get("ifDescr", "") + child_indices = set() + + # ifStackTable children + child_indices |= discover_subinterfaces_stack(discovery_oids, matched_ifindex) + + # Pattern-based children (ifDescr matching) + if parent_descr: + pattern_children = discover_subinterfaces_pattern(discovery_oids, parent_descr) + child_indices |= set(pattern_children.keys()) + + # Also look for BDI/BVI interfaces that correlate with subinterfaces + bvi_indices = set() + for child_idx in child_indices: + child_descr = interfaces.get(child_idx, {}).get("ifDescr", "") + vlan_match = re.search(r"\.(\d+)$", child_descr) + if vlan_match: + vlan_id = vlan_match.group(1) + # Search ifDescr for BDI{N} or BVI{N} + for ifidx, info in interfaces.items(): + d = info.get("ifDescr", "") + if d == f"BDI{vlan_id}" or d == f"BVI{vlan_id}": + bvi_indices.add(ifidx) + + # Also find Vlan{N} SVIs (IOS-XE) + vlan_indices = set() + for ifidx, info in interfaces.items(): + d = info.get("ifDescr", "") + if re.match(r"^Vlan\d+$", d): + vlan_indices.add(ifidx) + + all_target_indices = {matched_ifindex} | child_indices | bvi_indices | vlan_indices + + with neighbor_lock: + neighbor_status[target] = { + "state": "walking", + "message": f"Phase 2: Getting details for {len(all_target_indices)} interfaces...", + } + + # ── Phase 2: Targeted snmpget ──────────────────────────── + target_oids = [] + for ifidx in all_target_indices: + for base_oid in NEIGHBOR_INTERFACE_OID_BASES: + target_oids.append(f"{base_oid}.{ifidx}") + + phase2_output = _snmpget_batch(walk_cmd, auth, target, target_oids) + + # Also walk optics/entity subtrees (small on most devices) + extra_output = _walk_subtrees_parallel( + walk_cmd, auth, target, NEIGHBOR_EXTRA_OIDS, + status_prefix="Phase 2: Optics/Entity " + ) + + # Combine all phases into one walk file + combined = discovery_output + if phase2_output.strip(): + combined += "\n" + phase2_output + if extra_output.strip(): + combined += "\n" + extra_output + + walk_file.write_text(combined) line_count = sum(1 for _ in walk_file.open()) elapsed = round(time.time() - t_start, 1) @@ -330,12 +491,12 @@ def run_neighbor_walk(target: str, rem_port_id: str, rem_sys_name: str = ""): } return - # Parse with cisco-parse.py + # ── Parse combined data with cisco-parse.py ────────────────── with neighbor_lock: neighbor_status[target] = {"state": "parsing", "message": "Parsing neighbor data..."} parse_result = subprocess.run( - [sys.executable, str(SCRIPT_DIR / "cisco-parse.py"), + [sys.executable, str(SCRIPT_DIR / "cisco_parse.py"), str(walk_file), rem_port_id], capture_output=True, text=True, timeout=60, ) @@ -361,7 +522,7 @@ def run_neighbor_walk(target: str, rem_port_id: str, rem_sys_name: str = ""): latest_neighbor[target] = neighbor_json neighbor_status[target] = { "state": "complete", - "message": f"Done — {line_count:,} lines in {elapsed}s", + "message": f"Done — {line_count:,} OIDs ({discovery_count:,} discovery + {line_count - discovery_count:,} targeted) in {elapsed}s", "json_path": str(neighbor_json), }