xref: /trueos/contrib/ofed/management/opensm/opensm/osm_ucast_cache.c (revision 8fe640108653f13042f1b15213769e338aa524f6)
1 /*
2  * Copyright (c) 2008      Mellanox Technologies LTD. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 
34 /*
35  * Abstract:
36  *    Implementation of OpenSM Cached Unicast Routing
37  *
38  * Environment:
39  *    Linux User Mode
40  *
41  */
42 
43 #if HAVE_CONFIG_H
44 #  include <config.h>
45 #endif
46 
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50 #include <errno.h>
51 #include <iba/ib_types.h>
52 #include <complib/cl_qmap.h>
53 #include <complib/cl_pool.h>
54 #include <complib/cl_debug.h>
55 #include <opensm/osm_opensm.h>
56 #include <opensm/osm_ucast_mgr.h>
57 #include <opensm/osm_ucast_cache.h>
58 #include <opensm/osm_switch.h>
59 #include <opensm/osm_node.h>
60 #include <opensm/osm_port.h>
61 
62 #define CACHE_SW_PORTS 36
63 
64 typedef struct cache_port {
65 	boolean_t is_leaf;
66 	uint16_t remote_lid_ho;
67 } cache_port_t;
68 
69 typedef struct cache_switch {
70 	cl_map_item_t map_item;
71 	boolean_t dropped;
72 	uint16_t max_lid_ho;
73 	uint16_t num_hops;
74 	uint8_t **hops;
75 	uint8_t *lft;
76 	uint8_t num_ports;
77 	cache_port_t ports[0];
78 } cache_switch_t;
79 
80 /**********************************************************************
81  **********************************************************************/
82 
__cache_sw_get_base_lid_ho(cache_switch_t * p_sw)83 static uint16_t __cache_sw_get_base_lid_ho(cache_switch_t * p_sw)
84 {
85 	return p_sw->ports[0].remote_lid_ho;
86 }
87 
88 /**********************************************************************
89  **********************************************************************/
90 
__cache_sw_is_leaf(cache_switch_t * p_sw)91 static boolean_t __cache_sw_is_leaf(cache_switch_t * p_sw)
92 {
93 	return p_sw->ports[0].is_leaf;
94 }
95 
96 /**********************************************************************
97  **********************************************************************/
98 
__cache_sw_set_leaf(cache_switch_t * p_sw)99 static void __cache_sw_set_leaf(cache_switch_t * p_sw)
100 {
101 	p_sw->ports[0].is_leaf = TRUE;
102 }
103 
104 /**********************************************************************
105  **********************************************************************/
106 
__cache_sw_new(uint16_t lid_ho,unsigned num_ports)107 static cache_switch_t *__cache_sw_new(uint16_t lid_ho, unsigned num_ports)
108 {
109 	cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) +
110 					    num_ports * sizeof(cache_port_t));
111 	if (!p_cache_sw)
112 		return NULL;
113 
114 	memset(p_cache_sw, 0,
115 	       sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t));
116 
117 	p_cache_sw->num_ports = num_ports;
118 
119 	/* port[0] fields represent this switch details - lid and type */
120 	p_cache_sw->ports[0].remote_lid_ho = lid_ho;
121 	p_cache_sw->ports[0].is_leaf = FALSE;
122 
123 	return p_cache_sw;
124 }
125 
126 /**********************************************************************
127  **********************************************************************/
128 
__cache_sw_destroy(cache_switch_t * p_sw)129 static void __cache_sw_destroy(cache_switch_t * p_sw)
130 {
131 	if (!p_sw)
132 		return;
133 
134 	if (p_sw->lft)
135 		free(p_sw->lft);
136 	if (p_sw->hops)
137 		free(p_sw->hops);
138 	free(p_sw);
139 }
140 
141 /**********************************************************************
142  **********************************************************************/
143 
__cache_get_sw(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho)144 static cache_switch_t *__cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho)
145 {
146 	cache_switch_t *p_cache_sw = (cache_switch_t *)
147 	    cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho);
148 	if (p_cache_sw == (cache_switch_t *)
149 	    cl_qmap_end(&p_mgr->cache_sw_tbl))
150 		p_cache_sw = NULL;
151 
152 	return p_cache_sw;
153 }
154 
155 /**********************************************************************
156  **********************************************************************/
__cache_add_sw_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p,uint16_t remote_lid_ho,boolean_t is_ca)157 static void __cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
158 				uint16_t remote_lid_ho, boolean_t is_ca)
159 {
160 	cache_switch_t *p_cache_sw;
161 	uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));
162 
163 	OSM_LOG_ENTER(p_mgr->p_log);
164 
165 	if (!lid_ho || !remote_lid_ho || !p->port_num)
166 		goto Exit;
167 
168 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
169 		"Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
170 		lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");
171 
172 	p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
173 	if (!p_cache_sw) {
174 		p_cache_sw = __cache_sw_new(lid_ho, p->p_node->sw->num_ports);
175 		if (!p_cache_sw) {
176 			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
177 				"ERR AD01: Out of memory - cache is invalid\n");
178 			osm_ucast_cache_invalidate(p_mgr);
179 			goto Exit;
180 		}
181 		cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
182 			       &p_cache_sw->map_item);
183 	}
184 
185 	if (p->port_num >= p_cache_sw->num_ports) {
186 		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
187 			"ERR AD02: Wrong switch? - cache is invalid\n");
188 		osm_ucast_cache_invalidate(p_mgr);
189 		goto Exit;
190 	}
191 
192 	if (is_ca)
193 		__cache_sw_set_leaf(p_cache_sw);
194 
195 	if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
196 		/* cache this link only if it hasn't been already cached */
197 		p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
198 		p_cache_sw->ports[p->port_num].is_leaf = is_ca;
199 	}
200 Exit:
201 	OSM_LOG_EXIT(p_mgr->p_log);
202 }
203 
204 /**********************************************************************
205  **********************************************************************/
206 
__cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)207 static void __cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)
208 {
209 	cache_switch_t *p_sw;
210 	cache_switch_t *p_next_sw;
211 	unsigned port_num;
212 	boolean_t found_port;
213 
214 	if (!p_mgr->cache_valid)
215 		return;
216 
217 	p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
218 	while (p_next_sw !=
219 	       (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
220 		p_sw = p_next_sw;
221 		p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
222 
223 		found_port = FALSE;
224 		for (port_num = 1; port_num < p_sw->num_ports; port_num++)
225 			if (p_sw->ports[port_num].remote_lid_ho)
226 				found_port = TRUE;
227 
228 		if (!found_port) {
229 			cl_qmap_remove_item(&p_mgr->cache_sw_tbl,
230 					    &p_sw->map_item);
231 			__cache_sw_destroy(p_sw);
232 		}
233 	}
234 }
235 
236 /**********************************************************************
237  **********************************************************************/
238 
239 static void
__cache_check_link_change(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp_1,osm_physp_t * p_physp_2)240 __cache_check_link_change(osm_ucast_mgr_t * p_mgr,
241 			  osm_physp_t * p_physp_1, osm_physp_t * p_physp_2)
242 {
243 	OSM_LOG_ENTER(p_mgr->p_log);
244 	CL_ASSERT(p_physp_1 && p_physp_2);
245 
246 	if (!p_mgr->cache_valid)
247 		goto Exit;
248 
249 	if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp)
250 		/* both ports were down - new link */
251 		goto Exit;
252 
253 	/* unicast cache cannot tolerate any link location change */
254 
255 	if ((p_physp_1->p_remote_physp &&
256 	     p_physp_1->p_remote_physp->p_remote_physp) ||
257 	    (p_physp_2->p_remote_physp &&
258 	     p_physp_2->p_remote_physp->p_remote_physp)) {
259 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
260 			"Link location change discovered - cache is invalid\n");
261 		osm_ucast_cache_invalidate(p_mgr);
262 		goto Exit;
263 	}
264 Exit:
265 	OSM_LOG_EXIT(p_mgr->p_log);
266 }
267 
268 /**********************************************************************
269  **********************************************************************/
270 
__cache_remove_port(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho,uint8_t port_num,uint16_t remote_lid_ho,boolean_t is_ca)271 static void __cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho,
272 				uint8_t port_num, uint16_t remote_lid_ho,
273 				boolean_t is_ca)
274 {
275 	cache_switch_t *p_cache_sw;
276 
277 	OSM_LOG_ENTER(p_mgr->p_log);
278 
279 	if (!p_mgr->cache_valid)
280 		goto Exit;
281 
282 	p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
283 	if (!p_cache_sw) {
284 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
285 			"Found uncached switch/link (lid %u, port %u) - "
286 			"cache is invalid\n", lid_ho, port_num);
287 		osm_ucast_cache_invalidate(p_mgr);
288 		goto Exit;
289 	}
290 
291 	if (port_num >= p_cache_sw->num_ports ||
292 	    !p_cache_sw->ports[port_num].remote_lid_ho) {
293 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
294 			"Found uncached switch link (lid %u, port %u) - "
295 			"cache is invalid\n", lid_ho, port_num);
296 		osm_ucast_cache_invalidate(p_mgr);
297 		goto Exit;
298 	}
299 
300 	if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) {
301 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
302 			"Remote lid change on switch lid %u, port %u "
303 			"(was %u, now %u) - cache is invalid\n",
304 			lid_ho, port_num,
305 			p_cache_sw->ports[port_num].remote_lid_ho,
306 			remote_lid_ho);
307 		osm_ucast_cache_invalidate(p_mgr);
308 		goto Exit;
309 	}
310 
311 	if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) ||
312 	    (!p_cache_sw->ports[port_num].is_leaf && is_ca)) {
313 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
314 			"Remote node type change on switch lid %u, port %u - "
315 			"cache is invalid\n", lid_ho, port_num);
316 		osm_ucast_cache_invalidate(p_mgr);
317 		goto Exit;
318 	}
319 
320 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
321 		"New link from lid %u, port %u to lid %u - "
322 		"found in cache\n", lid_ho, port_num, remote_lid_ho);
323 
324 	/* the new link was cached - clean it from the cache */
325 
326 	p_cache_sw->ports[port_num].remote_lid_ho = 0;
327 	p_cache_sw->ports[port_num].is_leaf = FALSE;
328 Exit:
329 	OSM_LOG_EXIT(p_mgr->p_log);
330 }				/* __cache_remove_port() */
331 
332 /**********************************************************************
333  **********************************************************************/
334 
335 static void
__cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,cache_switch_t * p_cache_sw,osm_switch_t * p_sw)336 __cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,
337 			   cache_switch_t * p_cache_sw, osm_switch_t * p_sw)
338 {
339 	if (!p_mgr->cache_valid)
340 		return;
341 
342 	/* when seting unicast info, the cached port
343 	   should have all the required info */
344 	CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft &&
345 		  p_cache_sw->num_hops && p_cache_sw->hops);
346 
347 	p_sw->max_lid_ho = p_cache_sw->max_lid_ho;
348 
349 	if (p_sw->new_lft)
350 		free(p_sw->new_lft);
351 	p_sw->new_lft = p_cache_sw->lft;
352 	p_cache_sw->lft = NULL;
353 
354 	p_sw->num_hops = p_cache_sw->num_hops;
355 	p_cache_sw->num_hops = 0;
356 	if (p_sw->hops)
357 		free(p_sw->hops);
358 	p_sw->hops = p_cache_sw->hops;
359 	p_cache_sw->hops = NULL;
360 }
361 
362 /**********************************************************************
363  **********************************************************************/
364 
__ucast_cache_dump(osm_ucast_mgr_t * p_mgr)365 static void __ucast_cache_dump(osm_ucast_mgr_t * p_mgr)
366 {
367 	cache_switch_t *p_sw;
368 	unsigned i;
369 
370 	OSM_LOG_ENTER(p_mgr->p_log);
371 
372 	if (!osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG))
373 		goto Exit;
374 
375 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
376 		"Dumping missing nodes/links as logged by unicast cache:\n");
377 	for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
378 	     p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
379 	     p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) {
380 
381 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
382 			"\t Switch lid %u %s%s\n",
383 			__cache_sw_get_base_lid_ho(p_sw),
384 			(__cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "",
385 			(p_sw->dropped) ? "[whole switch missing]" : "");
386 
387 		for (i = 1; i < p_sw->num_ports; i++)
388 			if (p_sw->ports[i].remote_lid_ho > 0)
389 				OSM_LOG(p_mgr->p_log,
390 					OSM_LOG_DEBUG,
391 					"\t     - port %u -> lid %u %s\n",
392 					i, p_sw->ports[i].remote_lid_ho,
393 					(p_sw->ports[i].is_leaf) ?
394 					"[remote node is leaf]" : "");
395 	}
396 Exit:
397 	OSM_LOG_EXIT(p_mgr->p_log);
398 }
399 
400 /**********************************************************************
401  **********************************************************************/
402 
osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)403 void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)
404 {
405 	cache_switch_t *p_sw;
406 	cache_switch_t *p_next_sw;
407 
408 	OSM_LOG_ENTER(p_mgr->p_log);
409 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Invalidating unicast cache\n");
410 
411 	if (!p_mgr->cache_valid)
412 		goto Exit;
413 
414 	p_mgr->cache_valid = FALSE;
415 
416 	p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
417 	while (p_next_sw !=
418 	       (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
419 		p_sw = p_next_sw;
420 		p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
421 		__cache_sw_destroy(p_sw);
422 	}
423 	cl_qmap_remove_all(&p_mgr->cache_sw_tbl);
424 Exit:
425 	OSM_LOG_EXIT(p_mgr->p_log);
426 }
427 
428 /**********************************************************************
429  **********************************************************************/
430 
ucast_cache_validate(osm_ucast_mgr_t * p_mgr)431 static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
432 {
433 	cache_switch_t *p_cache_sw;
434 	cache_switch_t *p_remote_cache_sw;
435 	unsigned port_num;
436 	unsigned max_ports;
437 	uint8_t remote_node_type;
438 	uint16_t lid_ho;
439 	uint16_t remote_lid_ho;
440 	osm_switch_t *p_sw;
441 	osm_switch_t *p_remote_sw;
442 	osm_node_t *p_node;
443 	osm_physp_t *p_physp;
444 	osm_physp_t *p_remote_physp;
445 	osm_port_t *p_remote_port;
446 	cl_qmap_t *p_sw_tbl;
447 
448 	OSM_LOG_ENTER(p_mgr->p_log);
449 	if (!p_mgr->cache_valid)
450 		goto Exit;
451 
452 	/* If there are no switches in the subnet, we are done */
453 	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
454 	if (cl_qmap_count(p_sw_tbl) == 0) {
455 		osm_ucast_cache_invalidate(p_mgr);
456 		goto Exit;
457 	}
458 
459 	/*
460 	 * Scan all the physical switch ports in the subnet.
461 	 * If the port need_update flag is on, check whether
462 	 * it's just some node/port reset or a cached topology
463 	 * change. Otherwise the cache is invalid.
464 	 */
465 	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
466 	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
467 	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
468 
469 		p_node = p_sw->p_node;
470 
471 		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
472 		p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
473 
474 		max_ports = osm_node_get_num_physp(p_node);
475 
476 		/* skip port 0 */
477 		for (port_num = 1; port_num < max_ports; port_num++) {
478 
479 			p_physp = osm_node_get_physp_ptr(p_node, port_num);
480 
481 			if (!p_physp || !p_physp->p_remote_physp ||
482 			    !osm_physp_link_exists(p_physp,
483 						   p_physp->p_remote_physp))
484 				/* no valid link */
485 				continue;
486 
487 			/*
488 			 * While scanning all the physical ports in the subnet,
489 			 * mark corresponding leaf switches in the cache.
490 			 */
491 			if (p_cache_sw &&
492 			    !p_cache_sw->dropped &&
493 			    !__cache_sw_is_leaf(p_cache_sw) &&
494 			    p_physp->p_remote_physp->p_node &&
495 			    osm_node_get_type(p_physp->p_remote_physp->
496 					      p_node) != IB_NODE_TYPE_SWITCH)
497 				__cache_sw_set_leaf(p_cache_sw);
498 
499 			if (!p_physp->need_update)
500 				continue;
501 
502 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
503 				"Checking switch lid %u, port %u\n",
504 				lid_ho, port_num);
505 
506 			p_remote_physp = osm_physp_get_remote(p_physp);
507 			remote_node_type =
508 			    osm_node_get_type(p_remote_physp->p_node);
509 
510 			if (remote_node_type == IB_NODE_TYPE_SWITCH)
511 				remote_lid_ho =
512 				    cl_ntoh16(osm_node_get_base_lid
513 					      (p_remote_physp->p_node, 0));
514 			else
515 				remote_lid_ho =
516 				    cl_ntoh16(osm_node_get_base_lid
517 					      (p_remote_physp->p_node,
518 					       osm_physp_get_port_num
519 					       (p_remote_physp)));
520 
521 			if (!p_cache_sw ||
522 			    port_num >= p_cache_sw->num_ports ||
523 			    !p_cache_sw->ports[port_num].remote_lid_ho) {
524 				/*
525 				 * There is some uncached change on the port.
526 				 * In general, the reasons might be as follows:
527 				 *  - switch reset
528 				 *  - port reset (or port down/up)
529 				 *  - quick connection location change
530 				 *  - new link (or new switch)
531 				 *
532 				 * First two reasons allow cache usage, while
533 				 * the last two reasons should invalidate cache.
534 				 *
535 				 * In case of quick connection location change,
536 				 * cache would have been invalidated by
537 				 * osm_ucast_cache_check_new_link() function.
538 				 *
539 				 * In case of new link between two known nodes,
540 				 * cache also would have been invalidated by
541 				 * osm_ucast_cache_check_new_link() function.
542 				 *
543 				 * Another reason is cached link between two
544 				 * known switches went back. In this case the
545 				 * osm_ucast_cache_check_new_link() function would
546 				 * clear both sides of the link from the cache
547 				 * during the discovery process, so effectively
548 				 * this would be equivalent to port reset.
549 				 *
550 				 * So three possible reasons remain:
551 				 *  - switch reset
552 				 *  - port reset (or port down/up)
553 				 *  - link of a new switch
554 				 *
555 				 * To validate cache, we need to check only the
556 				 * third reason - link of a new node/switch:
557 				 *  - If this is the local switch that is new,
558 				 *    then it should have (p_sw->need_update == 2).
559 				 *  - If the remote node is switch and it's new,
560 				 *    then it also should have
561 				 *    (p_sw->need_update == 2).
562 				 *  - If the remote node is CA/RTR and it's new,
563 				 *    then its port should have is_new flag on.
564 				 */
565 				if (p_sw->need_update == 2) {
566 					OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
567 						"New switch found (lid %u) - "
568 						"cache is invalid\n", lid_ho);
569 					osm_ucast_cache_invalidate(p_mgr);
570 					goto Exit;
571 				}
572 
573 				if (remote_node_type == IB_NODE_TYPE_SWITCH) {
574 
575 					p_remote_sw =
576 					    p_remote_physp->p_node->sw;
577 					if (p_remote_sw->need_update == 2) {
578 						/* this could also be case of
579 						   switch coming back with an
580 						   additional link that it
581 						   didn't have before */
582 						OSM_LOG(p_mgr->p_log,
583 							OSM_LOG_INFO,
584 							"New switch/link found (lid %u) - "
585 							"cache is invalid\n",
586 							remote_lid_ho);
587 						osm_ucast_cache_invalidate
588 						    (p_mgr);
589 						goto Exit;
590 					}
591 				} else {
592 					/*
593 					 * Remote node is CA/RTR.
594 					 * Get p_port of the remote node and
595 					 * check its p_port->is_new flag.
596 					 */
597 					p_remote_port =
598 					    osm_get_port_by_guid(p_mgr->p_subn,
599 								 osm_physp_get_port_guid
600 								 (p_remote_physp));
601 					if (p_remote_port->is_new) {
602 						OSM_LOG(p_mgr->p_log,
603 							OSM_LOG_INFO,
604 							"New CA/RTR found (lid %u) - "
605 							"cache is invalid\n",
606 							remote_lid_ho);
607 						osm_ucast_cache_invalidate
608 						    (p_mgr);
609 						goto Exit;
610 					}
611 				}
612 			} else {
613 				/*
614 				 * The change on the port is cached.
615 				 * In general, the reasons might be as follows:
616 				 *  - link between two known nodes went back
617 				 *  - one or more nodes went back, causing all
618 				 *    the links to reappear
619 				 *
620 				 * If it was link that went back, then this case
621 				 * would have been taken care of during the
622 				 * discovery by osm_ucast_cache_check_new_link(),
623 				 * so it's some node that went back.
624 				 */
625 				if ((p_cache_sw->ports[port_num].is_leaf &&
626 				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
627 				    (!p_cache_sw->ports[port_num].is_leaf &&
628 				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
629 					OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
630 						"Remote node type change on switch lid %u, port %u - "
631 						"cache is invalid\n",
632 						lid_ho, port_num);
633 					osm_ucast_cache_invalidate(p_mgr);
634 					goto Exit;
635 				}
636 
637 				if (p_cache_sw->ports[port_num].remote_lid_ho !=
638 				    remote_lid_ho) {
639 					OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
640 						"Remote lid change on switch lid %u, port %u"
641 						"(was %u, now %u) - cache is invalid\n",
642 						lid_ho, port_num,
643 						p_cache_sw->ports[port_num].
644 						remote_lid_ho, remote_lid_ho);
645 					osm_ucast_cache_invalidate(p_mgr);
646 					goto Exit;
647 				}
648 
649 				/*
650 				 * We don't care who is the node that has
651 				 * reappeared in the subnet (local or remote).
652 				 * What's important that the cached link matches
653 				 * the real fabrics link.
654 				 * Just clean it from cache.
655 				 */
656 
657 				p_cache_sw->ports[port_num].remote_lid_ho = 0;
658 				p_cache_sw->ports[port_num].is_leaf = FALSE;
659 				if (p_cache_sw->dropped) {
660 					__cache_restore_ucast_info(p_mgr,
661 								   p_cache_sw,
662 								   p_sw);
663 					p_cache_sw->dropped = FALSE;
664 				}
665 
666 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
667 					"Restored link from cache: lid %u, port %u to lid %u\n",
668 					lid_ho, port_num, remote_lid_ho);
669 			}
670 		}
671 	}
672 
673 	/* Remove all the cached switches that
674 	   have all their ports restored */
675 	__cache_cleanup_switches(p_mgr);
676 
677 	/*
678 	 * Done scanning all the physical switch ports in the subnet.
679 	 * Now we need to check the other side:
680 	 * Scan all the cached switches and their ports:
681 	 *  - If the cached switch is missing in the subnet
682 	 *    (dropped flag is on), check that it's a leaf switch.
683 	 *    If it's not a leaf, the cache is invalid, because
684 	 *    cache can tolerate only leaf switch removal.
685 	 *  - If the cached switch exists in fabric, check all
686 	 *    its cached ports. These cached ports represent
687 	 *    missing link in the fabric.
688 	 *    The missing links that can be tolerated are:
689 	 *      + link to missing CA/RTR
690 	 *      + link to missing leaf switch
691 	 */
692 	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
693 	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
694 	     p_cache_sw =
695 	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {
696 
697 		if (p_cache_sw->dropped) {
698 			if (!__cache_sw_is_leaf(p_cache_sw)) {
699 				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
700 					"Missing non-leaf switch (lid %u) - "
701 					"cache is invalid\n",
702 					__cache_sw_get_base_lid_ho(p_cache_sw));
703 				osm_ucast_cache_invalidate(p_mgr);
704 				goto Exit;
705 			}
706 
707 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
708 				"Missing leaf switch (lid %u) - "
709 				"continuing validation\n",
710 				__cache_sw_get_base_lid_ho(p_cache_sw));
711 			continue;
712 		}
713 
714 		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
715 			if (!p_cache_sw->ports[port_num].remote_lid_ho)
716 				continue;
717 
718 			if (p_cache_sw->ports[port_num].is_leaf) {
719 				CL_ASSERT(__cache_sw_is_leaf(p_cache_sw));
720 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
721 					"Switch lid %u, port %u: missing link to CA/RTR - "
722 					"continuing validation\n",
723 					__cache_sw_get_base_lid_ho(p_cache_sw),
724 					port_num);
725 				continue;
726 			}
727 
728 			p_remote_cache_sw = __cache_get_sw(p_mgr,
729 							   p_cache_sw->
730 							   ports[port_num].
731 							   remote_lid_ho);
732 
733 			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
734 				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
735 					"Switch lid %u, port %u: missing link to existing switch - "
736 					"cache is invalid\n",
737 					__cache_sw_get_base_lid_ho(p_cache_sw),
738 					port_num);
739 				osm_ucast_cache_invalidate(p_mgr);
740 				goto Exit;
741 			}
742 
743 			if (!__cache_sw_is_leaf(p_remote_cache_sw)) {
744 				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
745 					"Switch lid %u, port %u: missing link to non-leaf switch - "
746 					"cache is invalid\n",
747 					__cache_sw_get_base_lid_ho(p_cache_sw),
748 					port_num);
749 				osm_ucast_cache_invalidate(p_mgr);
750 				goto Exit;
751 			}
752 
753 			/*
754 			 * At this point we know that the missing link is to
755 			 * a leaf switch. However, one case deserves a special
756 			 * treatment. If there was a link between two leaf
757 			 * switches, then missing leaf switch might break
758 			 * routing. It is possible that there are routes
759 			 * that use leaf switches to get from switch to switch
760 			 * and not just to get to the CAs behind the leaf switch.
761 			 */
762 			if (__cache_sw_is_leaf(p_cache_sw) &&
763 			    __cache_sw_is_leaf(p_remote_cache_sw)) {
764 				OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
765 					"Switch lid %u, port %u: missing leaf-2-leaf link - "
766 					"cache is invalid\n",
767 					__cache_sw_get_base_lid_ho(p_cache_sw),
768 					port_num);
769 				osm_ucast_cache_invalidate(p_mgr);
770 				goto Exit;
771 			}
772 
773 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
774 				"Switch lid %u, port %u: missing remote leaf switch - "
775 				"continuing validation\n",
776 				__cache_sw_get_base_lid_ho(p_cache_sw),
777 				port_num);
778 		}
779 	}
780 
781 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
782 	__ucast_cache_dump(p_mgr);
783 Exit:
784 	OSM_LOG_EXIT(p_mgr->p_log);
785 }				/* osm_ucast_cache_validate() */
786 
787 /**********************************************************************
788  **********************************************************************/
789 
osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node_1,uint8_t port_num_1,osm_node_t * p_node_2,uint8_t port_num_2)790 void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
791 				    osm_node_t * p_node_1, uint8_t port_num_1,
792 				    osm_node_t * p_node_2, uint8_t port_num_2)
793 {
794 	uint16_t lid_ho_1;
795 	uint16_t lid_ho_2;
796 
797 	OSM_LOG_ENTER(p_mgr->p_log);
798 
799 	if (!p_mgr->cache_valid)
800 		goto Exit;
801 
802 	__cache_check_link_change(p_mgr,
803 				  osm_node_get_physp_ptr(p_node_1, port_num_1),
804 				  osm_node_get_physp_ptr(p_node_2, port_num_2));
805 
806 	if (!p_mgr->cache_valid)
807 		goto Exit;
808 
809 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
810 	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
811 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
812 			"Found CA/RTR-2-CA/RTR link - cache is invalid\n");
813 		osm_ucast_cache_invalidate(p_mgr);
814 		goto Exit;
815 	}
816 
817 	/* for code simplicity, we want the first node to be switch */
818 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
819 		osm_node_t *tmp_node = p_node_1;
820 		uint8_t tmp_port_num = port_num_1;
821 		p_node_1 = p_node_2;
822 		port_num_1 = port_num_2;
823 		p_node_2 = tmp_node;
824 		port_num_2 = tmp_port_num;
825 	}
826 
827 	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
828 
829 	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
830 		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
831 	else
832 		lid_ho_2 =
833 		    cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));
834 
835 	if (!lid_ho_1 || !lid_ho_2) {
836 		/*
837 		 * No lid assigned, which means that one of the nodes is new.
838 		 * Need to wait for lid manager to process this node.
839 		 * The switches and their links will be checked later when
840 		 * the whole cache validity will be verified.
841 		 */
842 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
843 			"Link port %u <-> %u reveals new node - cache will "
844 			"be validated later\n", port_num_1, port_num_2);
845 		goto Exit;
846 	}
847 
848 	__cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
849 			    (osm_node_get_type(p_node_2) !=
850 			     IB_NODE_TYPE_SWITCH));
851 
852 	/* if node_2 is a switch, the link should be cleaned from its cache */
853 
854 	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
855 		__cache_remove_port(p_mgr, lid_ho_2,
856 				    port_num_2, lid_ho_1, FALSE);
857 
858 Exit:
859 	OSM_LOG_EXIT(p_mgr->p_log);
860 }				/* osm_ucast_cache_check_new_link() */
861 
862 /**********************************************************************
863  **********************************************************************/
864 
osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp1,osm_physp_t * p_physp2)865 void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
866 			      osm_physp_t * p_physp1, osm_physp_t * p_physp2)
867 {
868 	osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
869 	uint16_t lid_ho_1, lid_ho_2;
870 
871 	OSM_LOG_ENTER(p_mgr->p_log);
872 
873 	if (!p_mgr->cache_valid)
874 		goto Exit;
875 
876 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
877 	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
878 		OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
879 			"Dropping CA-2-CA link - cache invalid\n");
880 		osm_ucast_cache_invalidate(p_mgr);
881 		goto Exit;
882 	}
883 
884 	if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
885 	     !osm_node_get_physp_ptr(p_node_1, 0)) ||
886 	    (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
887 	     !osm_node_get_physp_ptr(p_node_2, 0))) {
888 		/* we're caching a link when one of the nodes
889 		   has already been dropped and cached */
890 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
891 			"Port %u <-> port %u: port0 on one of the nodes "
892 			"has already been dropped and cached\n",
893 			p_physp1->port_num, p_physp2->port_num);
894 		goto Exit;
895 	}
896 
897 	/* One of the nodes is switch. Just for code
898 	   simplicity, make sure that it's the first node. */
899 
900 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
901 		osm_physp_t *tmp = p_physp1;
902 		p_physp1 = p_physp2;
903 		p_physp2 = tmp;
904 		p_node_1 = p_physp1->p_node;
905 		p_node_2 = p_physp2->p_node;
906 	}
907 
908 	if (!p_node_1->sw) {
909 		/* something is wrong - we'd better not use cache */
910 		osm_ucast_cache_invalidate(p_mgr);
911 		goto Exit;
912 	}
913 
914 	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
915 
916 	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {
917 
918 		if (!p_node_2->sw) {
919 			/* something is wrong - we'd better not use cache */
920 			osm_ucast_cache_invalidate(p_mgr);
921 			goto Exit;
922 		}
923 
924 		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
925 
926 		/* lost switch-2-switch link - cache both sides */
927 		__cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
928 		__cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
929 	} else {
930 		lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));
931 
932 		/* lost link to CA/RTR - cache only switch side */
933 		__cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
934 	}
935 
936 Exit:
937 	OSM_LOG_EXIT(p_mgr->p_log);
938 }				/* osm_ucast_cache_add_link() */
939 
940 /**********************************************************************
941  **********************************************************************/
942 
osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node)943 void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
944 {
945 	uint16_t lid_ho;
946 	uint8_t max_ports;
947 	uint8_t port_num;
948 	osm_physp_t *p_physp;
949 	cache_switch_t *p_cache_sw;
950 
951 	OSM_LOG_ENTER(p_mgr->p_log);
952 
953 	if (!p_mgr->cache_valid)
954 		goto Exit;
955 
956 	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
957 
958 		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
959 
960 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
961 			"Caching dropped switch lid %u\n", lid_ho);
962 
963 		if (!p_node->sw) {
964 			/* something is wrong - forget about cache */
965 			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
966 				"ERR AD03: no switch info for node lid %u - "
967 				"clearing cache\n", lid_ho);
968 			osm_ucast_cache_invalidate(p_mgr);
969 			goto Exit;
970 		}
971 
972 		/* unlink (add to cache) all the ports of this switch */
973 		max_ports = osm_node_get_num_physp(p_node);
974 		for (port_num = 1; port_num < max_ports; port_num++) {
975 
976 			p_physp = osm_node_get_physp_ptr(p_node, port_num);
977 			if (!p_physp || !p_physp->p_remote_physp)
978 				continue;
979 
980 			osm_ucast_cache_add_link(p_mgr, p_physp,
981 						 p_physp->p_remote_physp);
982 		}
983 
984 		/*
985 		 * All the ports have been dropped (cached).
986 		 * If one of the ports was connected to CA/RTR,
987 		 * then the cached switch would be marked as leaf.
988 		 * If it isn't, then the dropped switch isn't a leaf,
989 		 * and cache can't handle it.
990 		 */
991 
992 		p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
993 		CL_ASSERT(p_cache_sw);
994 
995 		if (!__cache_sw_is_leaf(p_cache_sw)) {
996 			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
997 				"Dropped non-leaf switch (lid %u) - "
998 				"cache is invalid\n", lid_ho);
999 			osm_ucast_cache_invalidate(p_mgr);
1000 			goto Exit;
1001 		}
1002 
1003 		p_cache_sw->dropped = TRUE;
1004 
1005 		if (!p_node->sw->num_hops || !p_node->sw->hops) {
1006 			OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1007 				"No LID matrices for switch lid %u - "
1008 				"cache is invalid\n", lid_ho);
1009 			osm_ucast_cache_invalidate(p_mgr);
1010 			goto Exit;
1011 		}
1012 
1013 		/* lid matrices */
1014 
1015 		p_cache_sw->num_hops = p_node->sw->num_hops;
1016 		p_node->sw->num_hops = 0;
1017 		p_cache_sw->hops = p_node->sw->hops;
1018 		p_node->sw->hops = NULL;
1019 
1020 		/* linear forwarding table */
1021 
1022 		if (p_node->sw->new_lft) {
1023 			/* LFT buffer exists - we use it, because
1024 			   it is more updated than the switch's LFT */
1025 			p_cache_sw->lft = p_node->sw->new_lft;
1026 			p_node->sw->new_lft = NULL;
1027 		} else {
1028 			/* no LFT buffer, so we use the switch's LFT */
1029 			p_cache_sw->lft = p_node->sw->lft;
1030 			p_node->sw->lft = NULL;
1031 		}
1032 		p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
1033 	} else {
1034 		/* dropping CA/RTR: add to cache all the ports of this node */
1035 		max_ports = osm_node_get_num_physp(p_node);
1036 		for (port_num = 1; port_num < max_ports; port_num++) {
1037 
1038 			p_physp = osm_node_get_physp_ptr(p_node, port_num);
1039 			if (!p_physp || !p_physp->p_remote_physp)
1040 				continue;
1041 
1042 			CL_ASSERT(osm_node_get_type
1043 				  (p_physp->p_remote_physp->p_node) ==
1044 				  IB_NODE_TYPE_SWITCH);
1045 
1046 			osm_ucast_cache_add_link(p_mgr,
1047 						 p_physp->p_remote_physp,
1048 						 p_physp);
1049 		}
1050 	}
1051 Exit:
1052 	OSM_LOG_EXIT(p_mgr->p_log);
1053 }				/* osm_ucast_cache_add_node() */
1054 
1055 /**********************************************************************
1056  **********************************************************************/
1057 
osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)1058 int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)
1059 {
1060 	cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl;
1061 	cl_map_item_t *item;
1062 	osm_switch_t *p_sw;
1063 
1064 	if (!p_mgr->p_subn->opt.use_ucast_cache)
1065 		return 1;
1066 
1067 	ucast_cache_validate(p_mgr);
1068 	if (!p_mgr->cache_valid)
1069 		return 1;
1070 
1071 	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1072 		"Configuring switch tables using cached routing\n");
1073 
1074 	for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl);
1075 	     item = cl_qmap_next(item)) {
1076 		p_sw = (osm_switch_t *) item;
1077 
1078 		if (p_sw->need_update && !p_sw->new_lft) {
1079 			/* no new routing was recently calculated for this
1080 			   switch, but the LFT needs to be updated anyway */
1081 			p_sw->new_lft = p_sw->lft;
1082 			p_sw->lft = malloc(IB_LID_UCAST_END_HO + 1);
1083 			if (!p_sw->lft)
1084 				return IB_INSUFFICIENT_MEMORY;
1085 			memset(p_sw->lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
1086 		}
1087 
1088 		osm_ucast_mgr_set_fwd_table(p_mgr, p_sw);
1089 	}
1090 
1091 	return 0;
1092 }
1093 
1094 /**********************************************************************
1095  **********************************************************************/
1096