1 /*-
2  * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * NETLOGIC_BSD */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: stable/10/sys/mips/nlm/cms.c 233534 2012-03-27 07:47:13Z jchandra $");
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/param.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/proc.h>
38 #include <sys/limits.h>
39 #include <sys/bus.h>
40 
41 #include <sys/ktr.h>
42 #include <sys/kernel.h>
43 #include <sys/kthread.h>
44 #include <sys/proc.h>
45 #include <sys/resourcevar.h>
46 #include <sys/sched.h>
47 #include <sys/unistd.h>
48 #include <sys/sysctl.h>
49 #include <sys/malloc.h>
50 
51 #include <machine/reg.h>
52 #include <machine/cpu.h>
53 #include <machine/hwfunc.h>
54 #include <machine/mips_opcode.h>
55 #include <machine/param.h>
56 #include <machine/intr_machdep.h>
57 
58 #include <mips/nlm/hal/mips-extns.h>
59 #include <mips/nlm/hal/haldefs.h>
60 #include <mips/nlm/hal/iomap.h>
61 #include <mips/nlm/hal/cop2.h>
62 #include <mips/nlm/hal/fmn.h>
63 #include <mips/nlm/hal/pic.h>
64 
65 #include <mips/nlm/msgring.h>
66 #include <mips/nlm/interrupt.h>
67 #include <mips/nlm/xlp.h>
68 
69 #define	MSGRNG_NSTATIONS	1024
70 /*
71  * Keep track of our message ring handler threads, each core has a
72  * different message station. Ideally we will need to start a few
73  * message handling threads every core, and wake them up depending on
74  * load
75  */
76 struct msgring_thread {
77 	struct thread	*thread;	/* msgring handler threads */
78 	int	needed;			/* thread needs to wake up */
79 };
80 static struct msgring_thread msgring_threads[XLP_MAX_CORES * XLP_MAX_THREADS];
81 static struct proc *msgring_proc;	/* all threads are under a proc */
82 
83 /*
84  * The device drivers can register a handler for the messages sent
85  * from a station (corresponding to the device).
86  */
87 struct tx_stn_handler {
88 	msgring_handler action;
89 	void *arg;
90 };
91 static struct tx_stn_handler msgmap[MSGRNG_NSTATIONS];
92 static struct mtx	msgmap_lock;
93 uint32_t xlp_msg_thread_mask;
94 static int xlp_msg_threads_per_core = XLP_MAX_THREADS;
95 
96 static void create_msgring_thread(int hwtid);
97 static int msgring_process_fast_intr(void *arg);
98 
99 /* Debug counters */
100 static int msgring_nintr[XLP_MAX_CORES * XLP_MAX_THREADS];
101 static int msgring_wakeup_sleep[XLP_MAX_CORES * XLP_MAX_THREADS];
102 static int msgring_wakeup_nosleep[XLP_MAX_CORES * XLP_MAX_THREADS];
103 static int fmn_msgcount[XLP_MAX_CORES * XLP_MAX_THREADS][4];
104 static int fmn_loops[XLP_MAX_CORES * XLP_MAX_THREADS];
105 
106 /* Whether polled driver implementation */
107 static int polled = 0;
108 
109 /* We do only i/o device credit setup here. CPU credit setup is now
110  * moved to xlp_msgring_cpu_init() so that the credits get setup
111  * only if the CPU exists. xlp_msgring_cpu_init() gets called from
112  * platform_init_ap; and this makes it easy for us to setup CMS
113  * credits for various types of XLP chips, with varying number of
114  * cpu's and cores.
115  */
116 static void
xlp_cms_credit_setup(int credit)117 xlp_cms_credit_setup(int credit)
118 {
119 	uint64_t cmspcibase, cmsbase, pcibase;
120 	uint32_t devoffset;
121 	int dev, fn, maxqid;
122 	int src, qid, i;
123 
124 	for (i = 0; i < XLP_MAX_NODES; i++) {
125 		cmspcibase = nlm_get_cms_pcibase(i);
126 		if (!nlm_dev_exists(XLP_IO_CMS_OFFSET(i)))
127 			continue;
128 		cmsbase = nlm_get_cms_regbase(i);
129 		maxqid = nlm_read_reg(cmspcibase, XLP_PCI_DEVINFO_REG0);
130 		for (dev = 0; dev < 8; dev++) {
131 			for (fn = 0; fn < 8; fn++) {
132 				devoffset = XLP_HDR_OFFSET(i, 0, dev, fn);
133 				if (nlm_dev_exists(devoffset) == 0)
134 					continue;
135 				pcibase = nlm_pcicfg_base(devoffset);
136 				src = nlm_qidstart(pcibase);
137 				if (src == 0)
138 					continue;
139 #if 0 /* Debug */
140 				printf("Setup CMS credits for queues ");
141 				printf("[%d to %d] from src %d\n", 0,
142 				    maxqid, src);
143 #endif
144 				for (qid = 0; qid < maxqid; qid++)
145 					nlm_cms_setup_credits(cmsbase, qid,
146 					    src, credit);
147 			}
148 		}
149 	}
150 }
151 
152 void
xlp_msgring_cpu_init(int node,int cpu,int credit)153 xlp_msgring_cpu_init(int node, int cpu, int credit)
154 {
155 	uint64_t cmspcibase = nlm_get_cms_pcibase(node);
156 	uint64_t cmsbase = nlm_get_cms_regbase(node);
157 	int qid, maxqid, src;
158 
159 	maxqid = nlm_read_reg(cmspcibase, XLP_PCI_DEVINFO_REG0);
160 
161 	/* cpu credit setup is done only from thread-0 of each core */
162 	if((cpu % 4) == 0) {
163 		src = cpu << 2; /* each thread has 4 vc's */
164 		for (qid = 0; qid < maxqid; qid++)
165 			nlm_cms_setup_credits(cmsbase, qid, src, credit);
166 	}
167 }
168 
169 /*
170  * Drain out max_messages for the buckets set in the bucket mask.
171  * Use max_msgs = 0 to drain out all messages.
172  */
173 int
xlp_handle_msg_vc(u_int vcmask,int max_msgs)174 xlp_handle_msg_vc(u_int vcmask, int max_msgs)
175 {
176 	struct nlm_fmn_msg msg;
177 	int srcid = 0, size = 0, code = 0;
178 	struct tx_stn_handler *he;
179 	uint32_t mflags, status;
180 	int n_msgs = 0, vc, m, hwtid;
181 	u_int msgmask;
182 
183 	hwtid = nlm_cpuid();
184 	for (;;) {
185 		/* check if VC empty */
186 		mflags = nlm_save_flags_cop2();
187 		status = nlm_read_c2_msgstatus1();
188 		nlm_restore_flags(mflags);
189 
190 		msgmask = ((status >> 24) & 0xf) ^ 0xf;
191 		msgmask &= vcmask;
192 		if (msgmask == 0)
193 			    break;
194 		m = 0;
195 		for (vc = 0; vc < 4; vc++) {
196 			if ((msgmask & (1 << vc)) == 0)
197 				continue;
198 
199 			mflags = nlm_save_flags_cop2();
200 			status = nlm_fmn_msgrcv(vc, &srcid, &size, &code,
201 		 	    &msg);
202 			nlm_restore_flags(mflags);
203 			if (status != 0) 	/*  no msg or error */
204 				continue;
205 			if (srcid < 0 && srcid >= 1024) {
206 				printf("[%s]: bad src id %d\n", __func__,
207 				    srcid);
208 				continue;
209 			}
210 			he = &msgmap[srcid];
211 			if(he->action != NULL)
212 				(he->action)(vc, size, code, srcid, &msg,
213 				he->arg);
214 #if 0
215 			else
216 				printf("[%s]: No Handler for msg from stn %d,"
217 				    " vc=%d, size=%d, msg0=%jx, droppinge\n",
218 				    __func__, srcid, vc, size,
219 				    (uintmax_t)msg.msg[0]);
220 #endif
221 			fmn_msgcount[hwtid][vc] += 1;
222 			m++;	/* msgs handled in this iter */
223 		}
224 		if (m == 0)
225 			break;	/* nothing done in this iter */
226 		n_msgs += m;
227 		if (max_msgs > 0 && n_msgs >= max_msgs)
228 			break;
229 	}
230 
231 	return (n_msgs);
232 }
233 
234 static void
xlp_discard_msg_vc(u_int vcmask)235 xlp_discard_msg_vc(u_int vcmask)
236 {
237 	struct nlm_fmn_msg msg;
238 	int srcid = 0, size = 0, code = 0, vc;
239 	uint32_t mflags, status;
240 
241 	for (vc = 0; vc < 4; vc++) {
242 		for (;;) {
243 			mflags = nlm_save_flags_cop2();
244 			status = nlm_fmn_msgrcv(vc, &srcid,
245 			    &size, &code, &msg);
246 			nlm_restore_flags(mflags);
247 
248 			/* break if there is no msg or error */
249 			if (status != 0)
250 				break;
251 		}
252 	}
253 }
254 
255 void
xlp_cms_enable_intr(int node,int cpu,int type,int watermark)256 xlp_cms_enable_intr(int node, int cpu, int type, int watermark)
257 {
258 	uint64_t cmsbase;
259 	int i, qid;
260 
261 	cmsbase = nlm_get_cms_regbase(node);
262 
263 	for (i = 0; i < 4; i++) {
264 		qid = (i + (cpu * 4)) & 0x7f;
265 		nlm_cms_per_queue_level_intr(cmsbase, qid, type, watermark);
266 		nlm_cms_per_queue_timer_intr(cmsbase, qid, 0x1, 0);
267 	}
268 }
269 
270 static int
msgring_process_fast_intr(void * arg)271 msgring_process_fast_intr(void *arg)
272 {
273 	struct msgring_thread *mthd;
274 	struct thread *td;
275 	int	cpu;
276 
277 	cpu = nlm_cpuid();
278 	mthd = &msgring_threads[cpu];
279 	msgring_nintr[cpu]++;
280 	td = mthd->thread;
281 
282 	/* clear pending interrupts */
283 	nlm_write_c0_eirr(1ULL << IRQ_MSGRING);
284 
285 	/* wake up the target thread */
286 	mthd->needed = 1;
287 	thread_lock(td);
288 	if (TD_AWAITING_INTR(td)) {
289 		msgring_wakeup_sleep[cpu]++;
290 		TD_CLR_IWAIT(td);
291 		sched_add(td, SRQ_INTR);
292 	} else
293 		msgring_wakeup_nosleep[cpu]++;
294 
295 	thread_unlock(td);
296 
297 	return (FILTER_HANDLED);
298 }
299 
300 static void
msgring_process(void * arg)301 msgring_process(void * arg)
302 {
303 	volatile struct msgring_thread *mthd;
304 	struct thread *td;
305 	uint32_t mflags, msgstatus1;
306 	int hwtid, nmsgs;
307 
308 	hwtid = (intptr_t)arg;
309 	mthd = &msgring_threads[hwtid];
310 	td = mthd->thread;
311 	KASSERT(curthread == td,
312 	    ("%s:msg_ithread and proc linkage out of sync", __func__));
313 
314 	/* First bind this thread to the right CPU */
315 	thread_lock(td);
316 	sched_bind(td, xlp_hwtid_to_cpuid[hwtid]);
317 	thread_unlock(td);
318 
319 	if (hwtid != nlm_cpuid())
320 		printf("Misscheduled hwtid %d != cpuid %d\n", hwtid,
321 		    nlm_cpuid());
322 
323 	xlp_discard_msg_vc(0xf);
324 	xlp_msgring_cpu_init(nlm_nodeid(), nlm_cpuid(), CMS_DEFAULT_CREDIT);
325 	if (polled == 0) {
326 		mflags = nlm_save_flags_cop2();
327 		nlm_fmn_cpu_init(IRQ_MSGRING, 0, 0, 0, 0, 0);
328 		nlm_restore_flags(mflags);
329 		xlp_cms_enable_intr(nlm_nodeid(), nlm_cpuid(), 0x2, 0);
330 		/* clear pending interrupts.
331 		 *  they will get re-raised if still valid */
332 		nlm_write_c0_eirr(1ULL << IRQ_MSGRING);
333 	}
334 
335 	/* start processing messages */
336 	for (;;) {
337 		atomic_store_rel_int(&mthd->needed, 0);
338 		nmsgs = xlp_handle_msg_vc(0xf, 0);
339 
340 		/* sleep */
341 		if (polled == 0) {
342 			/* clear VC-pend bits */
343 			mflags = nlm_save_flags_cop2();
344 			msgstatus1 = nlm_read_c2_msgstatus1();
345 			msgstatus1 |= (0xf << 16);
346 			nlm_write_c2_msgstatus1(msgstatus1);
347 			nlm_restore_flags(mflags);
348 
349 			thread_lock(td);
350 			if (mthd->needed) {
351 				thread_unlock(td);
352 				continue;
353 			}
354 			sched_class(td, PRI_ITHD);
355 			TD_SET_IWAIT(td);
356 			mi_switch(SW_VOL, NULL);
357 			thread_unlock(td);
358 		} else
359 			pause("wmsg", 1);
360 
361 		fmn_loops[hwtid]++;
362 	}
363 }
364 
365 static void
create_msgring_thread(int hwtid)366 create_msgring_thread(int hwtid)
367 {
368 	struct msgring_thread *mthd;
369 	struct thread *td;
370 	int	error;
371 
372 	mthd = &msgring_threads[hwtid];
373 	error = kproc_kthread_add(msgring_process, (void *)(uintptr_t)hwtid,
374 	    &msgring_proc, &td, RFSTOPPED, 2, "msgrngproc",
375 	    "msgthr%d", hwtid);
376 	if (error)
377 		panic("kproc_kthread_add() failed with %d", error);
378 	mthd->thread = td;
379 
380 	thread_lock(td);
381 	sched_class(td, PRI_ITHD);
382 	sched_add(td, SRQ_INTR);
383 	thread_unlock(td);
384 }
385 
386 int
register_msgring_handler(int startb,int endb,msgring_handler action,void * arg)387 register_msgring_handler(int startb, int endb, msgring_handler action,
388     void *arg)
389 {
390 	int	i;
391 
392 	if (bootverbose)
393 		printf("Register handler %d-%d %p(%p)\n",
394 		    startb, endb, action, arg);
395 	KASSERT(startb >= 0 && startb <= endb && endb < MSGRNG_NSTATIONS,
396 	    ("Invalid value for bucket range %d,%d", startb, endb));
397 
398 	mtx_lock_spin(&msgmap_lock);
399 	for (i = startb; i <= endb; i++) {
400 		KASSERT(msgmap[i].action == NULL,
401 		   ("Bucket %d already used [action %p]", i, msgmap[i].action));
402 		msgmap[i].action = action;
403 		msgmap[i].arg = arg;
404 	}
405 	mtx_unlock_spin(&msgmap_lock);
406 	return (0);
407 }
408 
409 /*
410  * Initialize the messaging subsystem.
411  *
412  * Message Stations are shared among all threads in a cpu core, this
413  * has to be called once from every core which is online.
414  */
415 static void
xlp_msgring_config(void * arg)416 xlp_msgring_config(void *arg)
417 {
418 	void *cookie;
419 	unsigned int thrmask, mask;
420 	int i;
421 
422 	/* used polled handler for Ax silion */
423 	if (nlm_is_xlp8xx_ax())
424 		polled = 1;
425 
426 	/* Don't poll on all threads, if polled */
427 	if (polled)
428 		xlp_msg_threads_per_core -= 1;
429 
430 	mtx_init(&msgmap_lock, "msgring", NULL, MTX_SPIN);
431 	if (xlp_threads_per_core < xlp_msg_threads_per_core)
432 		xlp_msg_threads_per_core = xlp_threads_per_core;
433 	thrmask = ((1 << xlp_msg_threads_per_core) - 1);
434 	mask = 0;
435 	for (i = 0; i < XLP_MAX_CORES; i++) {
436 		mask <<= XLP_MAX_THREADS;
437 		mask |= thrmask;
438 	}
439 	xlp_msg_thread_mask = xlp_hw_thread_mask & mask;
440 #if 0
441 	printf("CMS Message handler thread mask %#jx\n",
442 	    (uintmax_t)xlp_msg_thread_mask);
443 #endif
444 	xlp_cms_credit_setup(CMS_DEFAULT_CREDIT);
445 	create_msgring_thread(0);
446 	cpu_establish_hardintr("msgring", msgring_process_fast_intr, NULL,
447 	    NULL, IRQ_MSGRING, INTR_TYPE_NET, &cookie);
448 }
449 
450 /*
451  * Start message ring processing threads on other CPUs, after SMP start
452  */
453 static void
start_msgring_threads(void * arg)454 start_msgring_threads(void *arg)
455 {
456 	int	hwt;
457 
458 	for (hwt = 1; hwt < XLP_MAX_CORES * XLP_MAX_THREADS; hwt++) {
459 		if ((xlp_msg_thread_mask & (1 << hwt)) == 0)
460 			continue;
461 		create_msgring_thread(hwt);
462 	}
463 }
464 
465 SYSINIT(xlp_msgring_config, SI_SUB_DRIVERS, SI_ORDER_FIRST,
466     xlp_msgring_config, NULL);
467 SYSINIT(start_msgring_threads, SI_SUB_SMP, SI_ORDER_MIDDLE,
468     start_msgring_threads, NULL);
469 
470 /*
471  * DEBUG support, XXX: static buffer, not locked
472  */
473 static int
sys_print_debug(SYSCTL_HANDLER_ARGS)474 sys_print_debug(SYSCTL_HANDLER_ARGS)
475 {
476 	int error, nb, i, fs;
477 	static char xprintb[4096], *buf;
478 
479 	buf = xprintb;
480 	fs = sizeof(xprintb);
481 	nb = snprintf(buf, fs,
482 	    "\nID     vc0       vc1       vc2     vc3     loops\n");
483 	buf += nb;
484 	fs -= nb;
485 	for (i = 0; i < 32; i++) {
486 		if ((xlp_hw_thread_mask & (1 << i)) == 0)
487 			continue;
488 		nb = snprintf(buf, fs,
489 		    "%2d: %8d %8d %8d %8d %8d\n", i,
490 		    fmn_msgcount[i][0], fmn_msgcount[i][1],
491 		    fmn_msgcount[i][2], fmn_msgcount[i][3],
492 		    fmn_loops[i]);
493 		buf += nb;
494 		fs -= nb;
495 	}
496 	error = SYSCTL_OUT(req, xprintb, buf - xprintb);
497 	return (error);
498 }
499 
500 SYSCTL_PROC(_debug, OID_AUTO, msgring, CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
501     sys_print_debug, "A", "msgring debug info");
502