1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #define LINUXKPI_PARAM_PREFIX mthca_
34
35 #include <linux/jiffies.h>
36 #include <linux/timer.h>
37 #include <linux/workqueue.h>
38
39 #include "mthca_dev.h"
40
41 enum {
42 MTHCA_CATAS_TYPE_INTERNAL = 0,
43 MTHCA_CATAS_TYPE_UPLINK = 3,
44 MTHCA_CATAS_TYPE_DDR = 4,
45 MTHCA_CATAS_TYPE_PARITY = 5,
46 };
47
48 #define MTHCA_CATAS_POLL_INTERVAL (5 * HZ)
49
50 static DEFINE_SPINLOCK(catas_lock);
51
52 static LIST_HEAD(catas_list);
53 static struct workqueue_struct *catas_wq;
54 static struct work_struct catas_work;
55
56 static int catas_reset_disable;
57 module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
58 MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
59
catas_reset(struct work_struct * work)60 static void catas_reset(struct work_struct *work)
61 {
62 struct mthca_dev *dev, *tmpdev;
63 LIST_HEAD(tlist);
64 int ret;
65
66 mutex_lock(&mthca_device_mutex);
67
68 spin_lock_irq(&catas_lock);
69 list_splice_init(&catas_list, &tlist);
70 spin_unlock_irq(&catas_lock);
71
72 list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
73 struct pci_dev *pdev = dev->pdev;
74 ret = __mthca_restart_one(dev->pdev);
75 /* 'dev' now is not valid */
76 if (ret)
77 printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
78 pci_name(pdev), ret);
79 else {
80 struct mthca_dev *d = pci_get_drvdata(pdev);
81 mthca_dbg(d, "Reset succeeded\n");
82 }
83 }
84
85 mutex_unlock(&mthca_device_mutex);
86 }
87
handle_catas(struct mthca_dev * dev)88 static void handle_catas(struct mthca_dev *dev)
89 {
90 struct ib_event event;
91 unsigned long flags;
92 const char *type;
93 int i;
94
95 event.device = &dev->ib_dev;
96 event.event = IB_EVENT_DEVICE_FATAL;
97 event.element.port_num = 0;
98 dev->active = 0;
99
100 ib_dispatch_event(&event);
101
102 switch (swab32(readl(dev->catas_err.map)) >> 24) {
103 case MTHCA_CATAS_TYPE_INTERNAL:
104 type = "internal error";
105 break;
106 case MTHCA_CATAS_TYPE_UPLINK:
107 type = "uplink bus error";
108 break;
109 case MTHCA_CATAS_TYPE_DDR:
110 type = "DDR data error";
111 break;
112 case MTHCA_CATAS_TYPE_PARITY:
113 type = "internal parity error";
114 break;
115 default:
116 type = "unknown error";
117 break;
118 }
119
120 mthca_err(dev, "Catastrophic error detected: %s\n", type);
121 for (i = 0; i < dev->catas_err.size; ++i)
122 mthca_err(dev, " buf[%02x]: %08x\n",
123 i, swab32(readl(dev->catas_err.map + i)));
124
125 if (catas_reset_disable)
126 return;
127
128 spin_lock_irqsave(&catas_lock, flags);
129 list_add(&dev->catas_err.list, &catas_list);
130 queue_work(catas_wq, &catas_work);
131 spin_unlock_irqrestore(&catas_lock, flags);
132 }
133
poll_catas(unsigned long dev_ptr)134 static void poll_catas(unsigned long dev_ptr)
135 {
136 struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
137 int i;
138
139 for (i = 0; i < dev->catas_err.size; ++i)
140 if (readl(dev->catas_err.map + i)) {
141 handle_catas(dev);
142 return;
143 }
144
145 mod_timer(&dev->catas_err.timer,
146 round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
147 }
148
mthca_start_catas_poll(struct mthca_dev * dev)149 void mthca_start_catas_poll(struct mthca_dev *dev)
150 {
151 unsigned long addr;
152
153 init_timer(&dev->catas_err.timer);
154 dev->catas_err.map = NULL;
155
156 addr = pci_resource_start(dev->pdev, 0) +
157 ((pci_resource_len(dev->pdev, 0) - 1) &
158 dev->catas_err.addr);
159
160 dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
161 if (!dev->catas_err.map) {
162 mthca_warn(dev, "couldn't map catastrophic error region "
163 "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
164 return;
165 }
166
167 dev->catas_err.timer.data = (unsigned long) dev;
168 dev->catas_err.timer.function = poll_catas;
169 dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
170 INIT_LIST_HEAD(&dev->catas_err.list);
171 add_timer(&dev->catas_err.timer);
172 }
173
mthca_stop_catas_poll(struct mthca_dev * dev)174 void mthca_stop_catas_poll(struct mthca_dev *dev)
175 {
176 del_timer_sync(&dev->catas_err.timer);
177
178 if (dev->catas_err.map)
179 iounmap(dev->catas_err.map);
180
181 spin_lock_irq(&catas_lock);
182 list_del(&dev->catas_err.list);
183 spin_unlock_irq(&catas_lock);
184 }
185
mthca_catas_init(void)186 int __init mthca_catas_init(void)
187 {
188 INIT_WORK(&catas_work, catas_reset);
189
190 catas_wq = create_singlethread_workqueue("mthcacatas");
191 if (!catas_wq)
192 return -ENOMEM;
193
194 return 0;
195 }
196
mthca_catas_cleanup(void)197 void mthca_catas_cleanup(void)
198 {
199 destroy_workqueue(catas_wq);
200 }
201