1 /*
2 * Copyright (c) 2008 Mellanox Technologies LTD. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34 /*
35 * Abstract:
36 * Implementation of OpenSM Cached Unicast Routing
37 *
38 * Environment:
39 * Linux User Mode
40 *
41 */
42
43 #if HAVE_CONFIG_H
44 # include <config.h>
45 #endif
46
47 #include <stdlib.h>
48 #include <string.h>
49 #include <ctype.h>
50 #include <errno.h>
51 #include <iba/ib_types.h>
52 #include <complib/cl_qmap.h>
53 #include <complib/cl_pool.h>
54 #include <complib/cl_debug.h>
55 #include <opensm/osm_opensm.h>
56 #include <opensm/osm_ucast_mgr.h>
57 #include <opensm/osm_ucast_cache.h>
58 #include <opensm/osm_switch.h>
59 #include <opensm/osm_node.h>
60 #include <opensm/osm_port.h>
61
62 #define CACHE_SW_PORTS 36
63
64 typedef struct cache_port {
65 boolean_t is_leaf;
66 uint16_t remote_lid_ho;
67 } cache_port_t;
68
69 typedef struct cache_switch {
70 cl_map_item_t map_item;
71 boolean_t dropped;
72 uint16_t max_lid_ho;
73 uint16_t num_hops;
74 uint8_t **hops;
75 uint8_t *lft;
76 uint8_t num_ports;
77 cache_port_t ports[0];
78 } cache_switch_t;
79
80 /**********************************************************************
81 **********************************************************************/
82
__cache_sw_get_base_lid_ho(cache_switch_t * p_sw)83 static uint16_t __cache_sw_get_base_lid_ho(cache_switch_t * p_sw)
84 {
85 return p_sw->ports[0].remote_lid_ho;
86 }
87
88 /**********************************************************************
89 **********************************************************************/
90
__cache_sw_is_leaf(cache_switch_t * p_sw)91 static boolean_t __cache_sw_is_leaf(cache_switch_t * p_sw)
92 {
93 return p_sw->ports[0].is_leaf;
94 }
95
96 /**********************************************************************
97 **********************************************************************/
98
__cache_sw_set_leaf(cache_switch_t * p_sw)99 static void __cache_sw_set_leaf(cache_switch_t * p_sw)
100 {
101 p_sw->ports[0].is_leaf = TRUE;
102 }
103
104 /**********************************************************************
105 **********************************************************************/
106
__cache_sw_new(uint16_t lid_ho,unsigned num_ports)107 static cache_switch_t *__cache_sw_new(uint16_t lid_ho, unsigned num_ports)
108 {
109 cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) +
110 num_ports * sizeof(cache_port_t));
111 if (!p_cache_sw)
112 return NULL;
113
114 memset(p_cache_sw, 0,
115 sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t));
116
117 p_cache_sw->num_ports = num_ports;
118
119 /* port[0] fields represent this switch details - lid and type */
120 p_cache_sw->ports[0].remote_lid_ho = lid_ho;
121 p_cache_sw->ports[0].is_leaf = FALSE;
122
123 return p_cache_sw;
124 }
125
126 /**********************************************************************
127 **********************************************************************/
128
__cache_sw_destroy(cache_switch_t * p_sw)129 static void __cache_sw_destroy(cache_switch_t * p_sw)
130 {
131 if (!p_sw)
132 return;
133
134 if (p_sw->lft)
135 free(p_sw->lft);
136 if (p_sw->hops)
137 free(p_sw->hops);
138 free(p_sw);
139 }
140
141 /**********************************************************************
142 **********************************************************************/
143
__cache_get_sw(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho)144 static cache_switch_t *__cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho)
145 {
146 cache_switch_t *p_cache_sw = (cache_switch_t *)
147 cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho);
148 if (p_cache_sw == (cache_switch_t *)
149 cl_qmap_end(&p_mgr->cache_sw_tbl))
150 p_cache_sw = NULL;
151
152 return p_cache_sw;
153 }
154
155 /**********************************************************************
156 **********************************************************************/
__cache_add_sw_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p,uint16_t remote_lid_ho,boolean_t is_ca)157 static void __cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
158 uint16_t remote_lid_ho, boolean_t is_ca)
159 {
160 cache_switch_t *p_cache_sw;
161 uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));
162
163 OSM_LOG_ENTER(p_mgr->p_log);
164
165 if (!lid_ho || !remote_lid_ho || !p->port_num)
166 goto Exit;
167
168 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
169 "Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
170 lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");
171
172 p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
173 if (!p_cache_sw) {
174 p_cache_sw = __cache_sw_new(lid_ho, p->p_node->sw->num_ports);
175 if (!p_cache_sw) {
176 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
177 "ERR AD01: Out of memory - cache is invalid\n");
178 osm_ucast_cache_invalidate(p_mgr);
179 goto Exit;
180 }
181 cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
182 &p_cache_sw->map_item);
183 }
184
185 if (p->port_num >= p_cache_sw->num_ports) {
186 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
187 "ERR AD02: Wrong switch? - cache is invalid\n");
188 osm_ucast_cache_invalidate(p_mgr);
189 goto Exit;
190 }
191
192 if (is_ca)
193 __cache_sw_set_leaf(p_cache_sw);
194
195 if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
196 /* cache this link only if it hasn't been already cached */
197 p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
198 p_cache_sw->ports[p->port_num].is_leaf = is_ca;
199 }
200 Exit:
201 OSM_LOG_EXIT(p_mgr->p_log);
202 }
203
204 /**********************************************************************
205 **********************************************************************/
206
__cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)207 static void __cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)
208 {
209 cache_switch_t *p_sw;
210 cache_switch_t *p_next_sw;
211 unsigned port_num;
212 boolean_t found_port;
213
214 if (!p_mgr->cache_valid)
215 return;
216
217 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
218 while (p_next_sw !=
219 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
220 p_sw = p_next_sw;
221 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
222
223 found_port = FALSE;
224 for (port_num = 1; port_num < p_sw->num_ports; port_num++)
225 if (p_sw->ports[port_num].remote_lid_ho)
226 found_port = TRUE;
227
228 if (!found_port) {
229 cl_qmap_remove_item(&p_mgr->cache_sw_tbl,
230 &p_sw->map_item);
231 __cache_sw_destroy(p_sw);
232 }
233 }
234 }
235
236 /**********************************************************************
237 **********************************************************************/
238
239 static void
__cache_check_link_change(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp_1,osm_physp_t * p_physp_2)240 __cache_check_link_change(osm_ucast_mgr_t * p_mgr,
241 osm_physp_t * p_physp_1, osm_physp_t * p_physp_2)
242 {
243 OSM_LOG_ENTER(p_mgr->p_log);
244 CL_ASSERT(p_physp_1 && p_physp_2);
245
246 if (!p_mgr->cache_valid)
247 goto Exit;
248
249 if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp)
250 /* both ports were down - new link */
251 goto Exit;
252
253 /* unicast cache cannot tolerate any link location change */
254
255 if ((p_physp_1->p_remote_physp &&
256 p_physp_1->p_remote_physp->p_remote_physp) ||
257 (p_physp_2->p_remote_physp &&
258 p_physp_2->p_remote_physp->p_remote_physp)) {
259 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
260 "Link location change discovered - cache is invalid\n");
261 osm_ucast_cache_invalidate(p_mgr);
262 goto Exit;
263 }
264 Exit:
265 OSM_LOG_EXIT(p_mgr->p_log);
266 }
267
268 /**********************************************************************
269 **********************************************************************/
270
__cache_remove_port(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho,uint8_t port_num,uint16_t remote_lid_ho,boolean_t is_ca)271 static void __cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho,
272 uint8_t port_num, uint16_t remote_lid_ho,
273 boolean_t is_ca)
274 {
275 cache_switch_t *p_cache_sw;
276
277 OSM_LOG_ENTER(p_mgr->p_log);
278
279 if (!p_mgr->cache_valid)
280 goto Exit;
281
282 p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
283 if (!p_cache_sw) {
284 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
285 "Found uncached switch/link (lid %u, port %u) - "
286 "cache is invalid\n", lid_ho, port_num);
287 osm_ucast_cache_invalidate(p_mgr);
288 goto Exit;
289 }
290
291 if (port_num >= p_cache_sw->num_ports ||
292 !p_cache_sw->ports[port_num].remote_lid_ho) {
293 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
294 "Found uncached switch link (lid %u, port %u) - "
295 "cache is invalid\n", lid_ho, port_num);
296 osm_ucast_cache_invalidate(p_mgr);
297 goto Exit;
298 }
299
300 if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) {
301 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
302 "Remote lid change on switch lid %u, port %u "
303 "(was %u, now %u) - cache is invalid\n",
304 lid_ho, port_num,
305 p_cache_sw->ports[port_num].remote_lid_ho,
306 remote_lid_ho);
307 osm_ucast_cache_invalidate(p_mgr);
308 goto Exit;
309 }
310
311 if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) ||
312 (!p_cache_sw->ports[port_num].is_leaf && is_ca)) {
313 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
314 "Remote node type change on switch lid %u, port %u - "
315 "cache is invalid\n", lid_ho, port_num);
316 osm_ucast_cache_invalidate(p_mgr);
317 goto Exit;
318 }
319
320 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
321 "New link from lid %u, port %u to lid %u - "
322 "found in cache\n", lid_ho, port_num, remote_lid_ho);
323
324 /* the new link was cached - clean it from the cache */
325
326 p_cache_sw->ports[port_num].remote_lid_ho = 0;
327 p_cache_sw->ports[port_num].is_leaf = FALSE;
328 Exit:
329 OSM_LOG_EXIT(p_mgr->p_log);
330 } /* __cache_remove_port() */
331
332 /**********************************************************************
333 **********************************************************************/
334
335 static void
__cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,cache_switch_t * p_cache_sw,osm_switch_t * p_sw)336 __cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,
337 cache_switch_t * p_cache_sw, osm_switch_t * p_sw)
338 {
339 if (!p_mgr->cache_valid)
340 return;
341
342 /* when seting unicast info, the cached port
343 should have all the required info */
344 CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft &&
345 p_cache_sw->num_hops && p_cache_sw->hops);
346
347 p_sw->max_lid_ho = p_cache_sw->max_lid_ho;
348
349 if (p_sw->new_lft)
350 free(p_sw->new_lft);
351 p_sw->new_lft = p_cache_sw->lft;
352 p_cache_sw->lft = NULL;
353
354 p_sw->num_hops = p_cache_sw->num_hops;
355 p_cache_sw->num_hops = 0;
356 if (p_sw->hops)
357 free(p_sw->hops);
358 p_sw->hops = p_cache_sw->hops;
359 p_cache_sw->hops = NULL;
360 }
361
362 /**********************************************************************
363 **********************************************************************/
364
__ucast_cache_dump(osm_ucast_mgr_t * p_mgr)365 static void __ucast_cache_dump(osm_ucast_mgr_t * p_mgr)
366 {
367 cache_switch_t *p_sw;
368 unsigned i;
369
370 OSM_LOG_ENTER(p_mgr->p_log);
371
372 if (!osm_log_is_active(p_mgr->p_log, OSM_LOG_DEBUG))
373 goto Exit;
374
375 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
376 "Dumping missing nodes/links as logged by unicast cache:\n");
377 for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
378 p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
379 p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) {
380
381 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
382 "\t Switch lid %u %s%s\n",
383 __cache_sw_get_base_lid_ho(p_sw),
384 (__cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "",
385 (p_sw->dropped) ? "[whole switch missing]" : "");
386
387 for (i = 1; i < p_sw->num_ports; i++)
388 if (p_sw->ports[i].remote_lid_ho > 0)
389 OSM_LOG(p_mgr->p_log,
390 OSM_LOG_DEBUG,
391 "\t - port %u -> lid %u %s\n",
392 i, p_sw->ports[i].remote_lid_ho,
393 (p_sw->ports[i].is_leaf) ?
394 "[remote node is leaf]" : "");
395 }
396 Exit:
397 OSM_LOG_EXIT(p_mgr->p_log);
398 }
399
400 /**********************************************************************
401 **********************************************************************/
402
osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)403 void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)
404 {
405 cache_switch_t *p_sw;
406 cache_switch_t *p_next_sw;
407
408 OSM_LOG_ENTER(p_mgr->p_log);
409 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Invalidating unicast cache\n");
410
411 if (!p_mgr->cache_valid)
412 goto Exit;
413
414 p_mgr->cache_valid = FALSE;
415
416 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
417 while (p_next_sw !=
418 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
419 p_sw = p_next_sw;
420 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
421 __cache_sw_destroy(p_sw);
422 }
423 cl_qmap_remove_all(&p_mgr->cache_sw_tbl);
424 Exit:
425 OSM_LOG_EXIT(p_mgr->p_log);
426 }
427
428 /**********************************************************************
429 **********************************************************************/
430
ucast_cache_validate(osm_ucast_mgr_t * p_mgr)431 static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
432 {
433 cache_switch_t *p_cache_sw;
434 cache_switch_t *p_remote_cache_sw;
435 unsigned port_num;
436 unsigned max_ports;
437 uint8_t remote_node_type;
438 uint16_t lid_ho;
439 uint16_t remote_lid_ho;
440 osm_switch_t *p_sw;
441 osm_switch_t *p_remote_sw;
442 osm_node_t *p_node;
443 osm_physp_t *p_physp;
444 osm_physp_t *p_remote_physp;
445 osm_port_t *p_remote_port;
446 cl_qmap_t *p_sw_tbl;
447
448 OSM_LOG_ENTER(p_mgr->p_log);
449 if (!p_mgr->cache_valid)
450 goto Exit;
451
452 /* If there are no switches in the subnet, we are done */
453 p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
454 if (cl_qmap_count(p_sw_tbl) == 0) {
455 osm_ucast_cache_invalidate(p_mgr);
456 goto Exit;
457 }
458
459 /*
460 * Scan all the physical switch ports in the subnet.
461 * If the port need_update flag is on, check whether
462 * it's just some node/port reset or a cached topology
463 * change. Otherwise the cache is invalid.
464 */
465 for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
466 p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
467 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
468
469 p_node = p_sw->p_node;
470
471 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
472 p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
473
474 max_ports = osm_node_get_num_physp(p_node);
475
476 /* skip port 0 */
477 for (port_num = 1; port_num < max_ports; port_num++) {
478
479 p_physp = osm_node_get_physp_ptr(p_node, port_num);
480
481 if (!p_physp || !p_physp->p_remote_physp ||
482 !osm_physp_link_exists(p_physp,
483 p_physp->p_remote_physp))
484 /* no valid link */
485 continue;
486
487 /*
488 * While scanning all the physical ports in the subnet,
489 * mark corresponding leaf switches in the cache.
490 */
491 if (p_cache_sw &&
492 !p_cache_sw->dropped &&
493 !__cache_sw_is_leaf(p_cache_sw) &&
494 p_physp->p_remote_physp->p_node &&
495 osm_node_get_type(p_physp->p_remote_physp->
496 p_node) != IB_NODE_TYPE_SWITCH)
497 __cache_sw_set_leaf(p_cache_sw);
498
499 if (!p_physp->need_update)
500 continue;
501
502 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
503 "Checking switch lid %u, port %u\n",
504 lid_ho, port_num);
505
506 p_remote_physp = osm_physp_get_remote(p_physp);
507 remote_node_type =
508 osm_node_get_type(p_remote_physp->p_node);
509
510 if (remote_node_type == IB_NODE_TYPE_SWITCH)
511 remote_lid_ho =
512 cl_ntoh16(osm_node_get_base_lid
513 (p_remote_physp->p_node, 0));
514 else
515 remote_lid_ho =
516 cl_ntoh16(osm_node_get_base_lid
517 (p_remote_physp->p_node,
518 osm_physp_get_port_num
519 (p_remote_physp)));
520
521 if (!p_cache_sw ||
522 port_num >= p_cache_sw->num_ports ||
523 !p_cache_sw->ports[port_num].remote_lid_ho) {
524 /*
525 * There is some uncached change on the port.
526 * In general, the reasons might be as follows:
527 * - switch reset
528 * - port reset (or port down/up)
529 * - quick connection location change
530 * - new link (or new switch)
531 *
532 * First two reasons allow cache usage, while
533 * the last two reasons should invalidate cache.
534 *
535 * In case of quick connection location change,
536 * cache would have been invalidated by
537 * osm_ucast_cache_check_new_link() function.
538 *
539 * In case of new link between two known nodes,
540 * cache also would have been invalidated by
541 * osm_ucast_cache_check_new_link() function.
542 *
543 * Another reason is cached link between two
544 * known switches went back. In this case the
545 * osm_ucast_cache_check_new_link() function would
546 * clear both sides of the link from the cache
547 * during the discovery process, so effectively
548 * this would be equivalent to port reset.
549 *
550 * So three possible reasons remain:
551 * - switch reset
552 * - port reset (or port down/up)
553 * - link of a new switch
554 *
555 * To validate cache, we need to check only the
556 * third reason - link of a new node/switch:
557 * - If this is the local switch that is new,
558 * then it should have (p_sw->need_update == 2).
559 * - If the remote node is switch and it's new,
560 * then it also should have
561 * (p_sw->need_update == 2).
562 * - If the remote node is CA/RTR and it's new,
563 * then its port should have is_new flag on.
564 */
565 if (p_sw->need_update == 2) {
566 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
567 "New switch found (lid %u) - "
568 "cache is invalid\n", lid_ho);
569 osm_ucast_cache_invalidate(p_mgr);
570 goto Exit;
571 }
572
573 if (remote_node_type == IB_NODE_TYPE_SWITCH) {
574
575 p_remote_sw =
576 p_remote_physp->p_node->sw;
577 if (p_remote_sw->need_update == 2) {
578 /* this could also be case of
579 switch coming back with an
580 additional link that it
581 didn't have before */
582 OSM_LOG(p_mgr->p_log,
583 OSM_LOG_INFO,
584 "New switch/link found (lid %u) - "
585 "cache is invalid\n",
586 remote_lid_ho);
587 osm_ucast_cache_invalidate
588 (p_mgr);
589 goto Exit;
590 }
591 } else {
592 /*
593 * Remote node is CA/RTR.
594 * Get p_port of the remote node and
595 * check its p_port->is_new flag.
596 */
597 p_remote_port =
598 osm_get_port_by_guid(p_mgr->p_subn,
599 osm_physp_get_port_guid
600 (p_remote_physp));
601 if (p_remote_port->is_new) {
602 OSM_LOG(p_mgr->p_log,
603 OSM_LOG_INFO,
604 "New CA/RTR found (lid %u) - "
605 "cache is invalid\n",
606 remote_lid_ho);
607 osm_ucast_cache_invalidate
608 (p_mgr);
609 goto Exit;
610 }
611 }
612 } else {
613 /*
614 * The change on the port is cached.
615 * In general, the reasons might be as follows:
616 * - link between two known nodes went back
617 * - one or more nodes went back, causing all
618 * the links to reappear
619 *
620 * If it was link that went back, then this case
621 * would have been taken care of during the
622 * discovery by osm_ucast_cache_check_new_link(),
623 * so it's some node that went back.
624 */
625 if ((p_cache_sw->ports[port_num].is_leaf &&
626 remote_node_type == IB_NODE_TYPE_SWITCH) ||
627 (!p_cache_sw->ports[port_num].is_leaf &&
628 remote_node_type != IB_NODE_TYPE_SWITCH)) {
629 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
630 "Remote node type change on switch lid %u, port %u - "
631 "cache is invalid\n",
632 lid_ho, port_num);
633 osm_ucast_cache_invalidate(p_mgr);
634 goto Exit;
635 }
636
637 if (p_cache_sw->ports[port_num].remote_lid_ho !=
638 remote_lid_ho) {
639 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
640 "Remote lid change on switch lid %u, port %u"
641 "(was %u, now %u) - cache is invalid\n",
642 lid_ho, port_num,
643 p_cache_sw->ports[port_num].
644 remote_lid_ho, remote_lid_ho);
645 osm_ucast_cache_invalidate(p_mgr);
646 goto Exit;
647 }
648
649 /*
650 * We don't care who is the node that has
651 * reappeared in the subnet (local or remote).
652 * What's important that the cached link matches
653 * the real fabrics link.
654 * Just clean it from cache.
655 */
656
657 p_cache_sw->ports[port_num].remote_lid_ho = 0;
658 p_cache_sw->ports[port_num].is_leaf = FALSE;
659 if (p_cache_sw->dropped) {
660 __cache_restore_ucast_info(p_mgr,
661 p_cache_sw,
662 p_sw);
663 p_cache_sw->dropped = FALSE;
664 }
665
666 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
667 "Restored link from cache: lid %u, port %u to lid %u\n",
668 lid_ho, port_num, remote_lid_ho);
669 }
670 }
671 }
672
673 /* Remove all the cached switches that
674 have all their ports restored */
675 __cache_cleanup_switches(p_mgr);
676
677 /*
678 * Done scanning all the physical switch ports in the subnet.
679 * Now we need to check the other side:
680 * Scan all the cached switches and their ports:
681 * - If the cached switch is missing in the subnet
682 * (dropped flag is on), check that it's a leaf switch.
683 * If it's not a leaf, the cache is invalid, because
684 * cache can tolerate only leaf switch removal.
685 * - If the cached switch exists in fabric, check all
686 * its cached ports. These cached ports represent
687 * missing link in the fabric.
688 * The missing links that can be tolerated are:
689 * + link to missing CA/RTR
690 * + link to missing leaf switch
691 */
692 for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
693 p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
694 p_cache_sw =
695 (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {
696
697 if (p_cache_sw->dropped) {
698 if (!__cache_sw_is_leaf(p_cache_sw)) {
699 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
700 "Missing non-leaf switch (lid %u) - "
701 "cache is invalid\n",
702 __cache_sw_get_base_lid_ho(p_cache_sw));
703 osm_ucast_cache_invalidate(p_mgr);
704 goto Exit;
705 }
706
707 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
708 "Missing leaf switch (lid %u) - "
709 "continuing validation\n",
710 __cache_sw_get_base_lid_ho(p_cache_sw));
711 continue;
712 }
713
714 for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
715 if (!p_cache_sw->ports[port_num].remote_lid_ho)
716 continue;
717
718 if (p_cache_sw->ports[port_num].is_leaf) {
719 CL_ASSERT(__cache_sw_is_leaf(p_cache_sw));
720 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
721 "Switch lid %u, port %u: missing link to CA/RTR - "
722 "continuing validation\n",
723 __cache_sw_get_base_lid_ho(p_cache_sw),
724 port_num);
725 continue;
726 }
727
728 p_remote_cache_sw = __cache_get_sw(p_mgr,
729 p_cache_sw->
730 ports[port_num].
731 remote_lid_ho);
732
733 if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
734 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
735 "Switch lid %u, port %u: missing link to existing switch - "
736 "cache is invalid\n",
737 __cache_sw_get_base_lid_ho(p_cache_sw),
738 port_num);
739 osm_ucast_cache_invalidate(p_mgr);
740 goto Exit;
741 }
742
743 if (!__cache_sw_is_leaf(p_remote_cache_sw)) {
744 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
745 "Switch lid %u, port %u: missing link to non-leaf switch - "
746 "cache is invalid\n",
747 __cache_sw_get_base_lid_ho(p_cache_sw),
748 port_num);
749 osm_ucast_cache_invalidate(p_mgr);
750 goto Exit;
751 }
752
753 /*
754 * At this point we know that the missing link is to
755 * a leaf switch. However, one case deserves a special
756 * treatment. If there was a link between two leaf
757 * switches, then missing leaf switch might break
758 * routing. It is possible that there are routes
759 * that use leaf switches to get from switch to switch
760 * and not just to get to the CAs behind the leaf switch.
761 */
762 if (__cache_sw_is_leaf(p_cache_sw) &&
763 __cache_sw_is_leaf(p_remote_cache_sw)) {
764 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
765 "Switch lid %u, port %u: missing leaf-2-leaf link - "
766 "cache is invalid\n",
767 __cache_sw_get_base_lid_ho(p_cache_sw),
768 port_num);
769 osm_ucast_cache_invalidate(p_mgr);
770 goto Exit;
771 }
772
773 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
774 "Switch lid %u, port %u: missing remote leaf switch - "
775 "continuing validation\n",
776 __cache_sw_get_base_lid_ho(p_cache_sw),
777 port_num);
778 }
779 }
780
781 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
782 __ucast_cache_dump(p_mgr);
783 Exit:
784 OSM_LOG_EXIT(p_mgr->p_log);
785 } /* osm_ucast_cache_validate() */
786
787 /**********************************************************************
788 **********************************************************************/
789
osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node_1,uint8_t port_num_1,osm_node_t * p_node_2,uint8_t port_num_2)790 void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
791 osm_node_t * p_node_1, uint8_t port_num_1,
792 osm_node_t * p_node_2, uint8_t port_num_2)
793 {
794 uint16_t lid_ho_1;
795 uint16_t lid_ho_2;
796
797 OSM_LOG_ENTER(p_mgr->p_log);
798
799 if (!p_mgr->cache_valid)
800 goto Exit;
801
802 __cache_check_link_change(p_mgr,
803 osm_node_get_physp_ptr(p_node_1, port_num_1),
804 osm_node_get_physp_ptr(p_node_2, port_num_2));
805
806 if (!p_mgr->cache_valid)
807 goto Exit;
808
809 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
810 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
811 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
812 "Found CA/RTR-2-CA/RTR link - cache is invalid\n");
813 osm_ucast_cache_invalidate(p_mgr);
814 goto Exit;
815 }
816
817 /* for code simplicity, we want the first node to be switch */
818 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
819 osm_node_t *tmp_node = p_node_1;
820 uint8_t tmp_port_num = port_num_1;
821 p_node_1 = p_node_2;
822 port_num_1 = port_num_2;
823 p_node_2 = tmp_node;
824 port_num_2 = tmp_port_num;
825 }
826
827 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
828
829 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
830 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
831 else
832 lid_ho_2 =
833 cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));
834
835 if (!lid_ho_1 || !lid_ho_2) {
836 /*
837 * No lid assigned, which means that one of the nodes is new.
838 * Need to wait for lid manager to process this node.
839 * The switches and their links will be checked later when
840 * the whole cache validity will be verified.
841 */
842 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
843 "Link port %u <-> %u reveals new node - cache will "
844 "be validated later\n", port_num_1, port_num_2);
845 goto Exit;
846 }
847
848 __cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
849 (osm_node_get_type(p_node_2) !=
850 IB_NODE_TYPE_SWITCH));
851
852 /* if node_2 is a switch, the link should be cleaned from its cache */
853
854 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
855 __cache_remove_port(p_mgr, lid_ho_2,
856 port_num_2, lid_ho_1, FALSE);
857
858 Exit:
859 OSM_LOG_EXIT(p_mgr->p_log);
860 } /* osm_ucast_cache_check_new_link() */
861
862 /**********************************************************************
863 **********************************************************************/
864
osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp1,osm_physp_t * p_physp2)865 void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
866 osm_physp_t * p_physp1, osm_physp_t * p_physp2)
867 {
868 osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
869 uint16_t lid_ho_1, lid_ho_2;
870
871 OSM_LOG_ENTER(p_mgr->p_log);
872
873 if (!p_mgr->cache_valid)
874 goto Exit;
875
876 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
877 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
878 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
879 "Dropping CA-2-CA link - cache invalid\n");
880 osm_ucast_cache_invalidate(p_mgr);
881 goto Exit;
882 }
883
884 if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
885 !osm_node_get_physp_ptr(p_node_1, 0)) ||
886 (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
887 !osm_node_get_physp_ptr(p_node_2, 0))) {
888 /* we're caching a link when one of the nodes
889 has already been dropped and cached */
890 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
891 "Port %u <-> port %u: port0 on one of the nodes "
892 "has already been dropped and cached\n",
893 p_physp1->port_num, p_physp2->port_num);
894 goto Exit;
895 }
896
897 /* One of the nodes is switch. Just for code
898 simplicity, make sure that it's the first node. */
899
900 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
901 osm_physp_t *tmp = p_physp1;
902 p_physp1 = p_physp2;
903 p_physp2 = tmp;
904 p_node_1 = p_physp1->p_node;
905 p_node_2 = p_physp2->p_node;
906 }
907
908 if (!p_node_1->sw) {
909 /* something is wrong - we'd better not use cache */
910 osm_ucast_cache_invalidate(p_mgr);
911 goto Exit;
912 }
913
914 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
915
916 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {
917
918 if (!p_node_2->sw) {
919 /* something is wrong - we'd better not use cache */
920 osm_ucast_cache_invalidate(p_mgr);
921 goto Exit;
922 }
923
924 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
925
926 /* lost switch-2-switch link - cache both sides */
927 __cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
928 __cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
929 } else {
930 lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));
931
932 /* lost link to CA/RTR - cache only switch side */
933 __cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
934 }
935
936 Exit:
937 OSM_LOG_EXIT(p_mgr->p_log);
938 } /* osm_ucast_cache_add_link() */
939
940 /**********************************************************************
941 **********************************************************************/
942
osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node)943 void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
944 {
945 uint16_t lid_ho;
946 uint8_t max_ports;
947 uint8_t port_num;
948 osm_physp_t *p_physp;
949 cache_switch_t *p_cache_sw;
950
951 OSM_LOG_ENTER(p_mgr->p_log);
952
953 if (!p_mgr->cache_valid)
954 goto Exit;
955
956 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
957
958 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
959
960 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
961 "Caching dropped switch lid %u\n", lid_ho);
962
963 if (!p_node->sw) {
964 /* something is wrong - forget about cache */
965 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
966 "ERR AD03: no switch info for node lid %u - "
967 "clearing cache\n", lid_ho);
968 osm_ucast_cache_invalidate(p_mgr);
969 goto Exit;
970 }
971
972 /* unlink (add to cache) all the ports of this switch */
973 max_ports = osm_node_get_num_physp(p_node);
974 for (port_num = 1; port_num < max_ports; port_num++) {
975
976 p_physp = osm_node_get_physp_ptr(p_node, port_num);
977 if (!p_physp || !p_physp->p_remote_physp)
978 continue;
979
980 osm_ucast_cache_add_link(p_mgr, p_physp,
981 p_physp->p_remote_physp);
982 }
983
984 /*
985 * All the ports have been dropped (cached).
986 * If one of the ports was connected to CA/RTR,
987 * then the cached switch would be marked as leaf.
988 * If it isn't, then the dropped switch isn't a leaf,
989 * and cache can't handle it.
990 */
991
992 p_cache_sw = __cache_get_sw(p_mgr, lid_ho);
993 CL_ASSERT(p_cache_sw);
994
995 if (!__cache_sw_is_leaf(p_cache_sw)) {
996 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
997 "Dropped non-leaf switch (lid %u) - "
998 "cache is invalid\n", lid_ho);
999 osm_ucast_cache_invalidate(p_mgr);
1000 goto Exit;
1001 }
1002
1003 p_cache_sw->dropped = TRUE;
1004
1005 if (!p_node->sw->num_hops || !p_node->sw->hops) {
1006 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1007 "No LID matrices for switch lid %u - "
1008 "cache is invalid\n", lid_ho);
1009 osm_ucast_cache_invalidate(p_mgr);
1010 goto Exit;
1011 }
1012
1013 /* lid matrices */
1014
1015 p_cache_sw->num_hops = p_node->sw->num_hops;
1016 p_node->sw->num_hops = 0;
1017 p_cache_sw->hops = p_node->sw->hops;
1018 p_node->sw->hops = NULL;
1019
1020 /* linear forwarding table */
1021
1022 if (p_node->sw->new_lft) {
1023 /* LFT buffer exists - we use it, because
1024 it is more updated than the switch's LFT */
1025 p_cache_sw->lft = p_node->sw->new_lft;
1026 p_node->sw->new_lft = NULL;
1027 } else {
1028 /* no LFT buffer, so we use the switch's LFT */
1029 p_cache_sw->lft = p_node->sw->lft;
1030 p_node->sw->lft = NULL;
1031 }
1032 p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
1033 } else {
1034 /* dropping CA/RTR: add to cache all the ports of this node */
1035 max_ports = osm_node_get_num_physp(p_node);
1036 for (port_num = 1; port_num < max_ports; port_num++) {
1037
1038 p_physp = osm_node_get_physp_ptr(p_node, port_num);
1039 if (!p_physp || !p_physp->p_remote_physp)
1040 continue;
1041
1042 CL_ASSERT(osm_node_get_type
1043 (p_physp->p_remote_physp->p_node) ==
1044 IB_NODE_TYPE_SWITCH);
1045
1046 osm_ucast_cache_add_link(p_mgr,
1047 p_physp->p_remote_physp,
1048 p_physp);
1049 }
1050 }
1051 Exit:
1052 OSM_LOG_EXIT(p_mgr->p_log);
1053 } /* osm_ucast_cache_add_node() */
1054
1055 /**********************************************************************
1056 **********************************************************************/
1057
osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)1058 int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)
1059 {
1060 cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl;
1061 cl_map_item_t *item;
1062 osm_switch_t *p_sw;
1063
1064 if (!p_mgr->p_subn->opt.use_ucast_cache)
1065 return 1;
1066
1067 ucast_cache_validate(p_mgr);
1068 if (!p_mgr->cache_valid)
1069 return 1;
1070
1071 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1072 "Configuring switch tables using cached routing\n");
1073
1074 for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl);
1075 item = cl_qmap_next(item)) {
1076 p_sw = (osm_switch_t *) item;
1077
1078 if (p_sw->need_update && !p_sw->new_lft) {
1079 /* no new routing was recently calculated for this
1080 switch, but the LFT needs to be updated anyway */
1081 p_sw->new_lft = p_sw->lft;
1082 p_sw->lft = malloc(IB_LID_UCAST_END_HO + 1);
1083 if (!p_sw->lft)
1084 return IB_INSUFFICIENT_MEMORY;
1085 memset(p_sw->lft, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
1086 }
1087
1088 osm_ucast_mgr_set_fwd_table(p_mgr, p_sw);
1089 }
1090
1091 return 0;
1092 }
1093
1094 /**********************************************************************
1095 **********************************************************************/
1096