xref: /NextBSD/sys/mips/nlm/hal/fmn.c (revision eb1a5f8de9f7ea602c373a710f531abbf81141c4)
1 /*-
2  * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are
7  * met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * NETLOGIC_BSD */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 
35 #include <machine/cpufunc.h>
36 #include <mips/nlm/hal/mips-extns.h>
37 #include <mips/nlm/hal/haldefs.h>
38 #include <mips/nlm/hal/iomap.h>
39 #include <mips/nlm/hal/fmn.h>
40 
41 /* XLP can take upto 16K of FMN messages per hardware queue, as spill.
42 * But, configuring all 16K causes the total spill memory to required
43 * to blow upto 192MB for single chip configuration, and 768MB in four
44 * chip configuration. Hence for now, we will setup the per queue spill
45 * as 1K FMN messages. With this, the total spill memory needed for 1024
46 * hardware queues (with 12bytes per single entry FMN message) becomes
47 * (1*1024)*12*1024queues = 12MB. For the four chip config, the memory
48 * needed = 12 * 4 = 48MB.
49 */
50 uint64_t nlm_cms_spill_total_messages = 1 * 1024;
51 
52 /* On a XLP832, we have the following FMN stations:
53 * CPU    stations: 8
54 * PCIE0  stations: 1
55 * PCIE1  stations: 1
56 * PCIE2  stations: 1
57 * PCIE3  stations: 1
58 * GDX    stations: 1
59 * CRYPTO stations: 1
60 * RSA    stations: 1
61 * CMP    stations: 1
62 * POE    stations: 1
63 * NAE    stations: 1
64 * ==================
65 * Total          : 18 stations per chip
66 *
67 * For all 4 nodes, there are 18*4 = 72 FMN stations
68 */
69 uint32_t nlm_cms_total_stations = 18 * 4 /*xlp_num_nodes*/;
70 
71 /**
72  * Takes inputs as node, queue_size and maximum number of queues.
73  * Calculates the base, start & end and returns the same for a
74  * defined qid.
75  *
76  * The output queues are maintained in the internal output buffer
77  * which is a on-chip SRAM structure. For the actial hardware
78  * internal implementation, It is a structure which consists
79  * of eight banks of 4096-entry x message-width SRAMs. The SRAM
80  * implementation is designed to run at 1GHz with a 1-cycle read/write
81  * access. A read/write transaction can be initiated for each bank
82  * every cycle for a total of eight accesses per cycle. Successive
83  * entries of the same output queue are placed in successive banks.
84  * This is done to spread different read & write accesses to same/different
85  * output queue over as many different banks as possible so that they
86  * can be scheduled concurrently. Spreading the accesses to as many banks
87  * as possible to maximize the concurrency internally is important for
88  * achieving the desired peak throughput. This is done by h/w implementation
89  * itself.
90  *
91  * Output queues are allocated from this internal output buffer by
92  * software. The total capacity of the output buffer is 32K-entry.
93  * Each output queue can be sized from 32-entry to 1024-entry in
94  * increments of 32-entry. This is done by specifying a Start & a
95  * End pointer: pointers to the first & last 32-entry chunks allocated
96  * to the output queue.
97  *
98  * To optimize the storage required for 1024 OQ pointers, the upper 5-bits
99  * are shared by the Start & the End pointer. The side-effect of this
100  * optimization is that an OQ can't cross a 1024-entry boundary. Also, the
101  * lower 5-bits don't need to be specified in the Start & the End pointer
102  * as the allocation is in increments of 32-entries.
103  *
104  * Queue occupancy is tracked by a Head & a Tail pointer. Tail pointer
105  * indicates the location to which next entry will be written & Head
106  * pointer indicates the location from which next entry will be read. When
107  * these pointers reach the top of the allocated space (indicated by the
108  * End pointer), they are reset to the bottom of the allocated space
109  * (indicated by the Start pointer).
110  *
111  * Output queue pointer information:
112  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113  *
114  *   14               10 9              5 4                 0
115  *   ------------------
116  *   | base ptr       |
117  *   ------------------
118  *                       ----------------
119  *                       | start ptr    |
120  *                       ----------------
121  *                       ----------------
122  *                       | end   ptr    |
123  *                       ----------------
124  *                       ------------------------------------
125  *                       |           head ptr               |
126  *                       ------------------------------------
127  *                       ------------------------------------
128  *                       |           tail ptr               |
129  *                       ------------------------------------
130  * Note:
131  * A total of 1024 segments can sit on one software-visible "bank"
132  * of internal SRAM. Each segment contains 32 entries. Also note
133  * that sw-visible "banks" are not the same as the actual internal
134  * 8-bank implementation of hardware. It is an optimization of
135  * internal access.
136  *
137  */
138 
nlm_cms_setup_credits(uint64_t base,int destid,int srcid,int credit)139 void nlm_cms_setup_credits(uint64_t base, int destid, int srcid, int credit)
140 {
141 	uint64_t val;
142 
143 	val = (((uint64_t)credit << 24) | (destid << 12) | (srcid << 0));
144 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CREDIT_CFG, val);
145 
146 }
147 
148 /*
149  * base		- CMS module base address for this node.
150  * qid		- is the output queue id otherwise called as vc id
151  * spill_base   - is the 40-bit physical address of spill memory. Must be
152 		  4KB aligned.
153  * nsegs	- No of segments where a "1" indicates 4KB. Spill size must be
154  *                a multiple of 4KB.
155  */
nlm_cms_alloc_spill_q(uint64_t base,int qid,uint64_t spill_base,int nsegs)156 int nlm_cms_alloc_spill_q(uint64_t base, int qid, uint64_t spill_base,
157 				int nsegs)
158 {
159 	uint64_t queue_config;
160 	uint32_t spill_start;
161 
162 	if (nsegs > CMS_MAX_SPILL_SEGMENTS_PER_QUEUE) {
163 		return 1;
164 	}
165 
166 	queue_config = nlm_read_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)));
167 
168 	spill_start = ((spill_base >> 12) & 0x3F);
169 	/* Spill configuration */
170 	queue_config = (((uint64_t)CMS_SPILL_ENA << 62) |
171 				(((spill_base >> 18) & 0x3FFFFF) << 27) |
172 				(spill_start + nsegs - 1) << 21 |
173 				(spill_start << 15));
174 
175 	nlm_write_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)),queue_config);
176 
177 	return 0;
178 }
179 
nlm_cms_get_onchip_queue(uint64_t base,int qid)180 uint64_t nlm_cms_get_onchip_queue (uint64_t base, int qid)
181 {
182 	return nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
183 }
184 
nlm_cms_set_onchip_queue(uint64_t base,int qid,uint64_t val)185 void nlm_cms_set_onchip_queue (uint64_t base, int qid, uint64_t val)
186 {
187 	uint64_t rdval;
188 
189 	rdval = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
190 	rdval |= val;
191 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), rdval);
192 }
193 
nlm_cms_per_queue_level_intr(uint64_t base,int qid,int sub_type,int intr_val)194 void nlm_cms_per_queue_level_intr(uint64_t base, int qid, int sub_type,
195 					int intr_val)
196 {
197 	uint64_t val;
198 
199 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
200 
201 	val &= ~((0x7ULL << 56) | (0x3ULL << 54));
202 
203 	val |= (((uint64_t)sub_type<<54) |
204 		((uint64_t)intr_val<<56));
205 
206 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
207 }
208 
nlm_cms_per_queue_timer_intr(uint64_t base,int qid,int sub_type,int intr_val)209 void nlm_cms_per_queue_timer_intr(uint64_t base, int qid, int sub_type,
210 					int intr_val)
211 {
212 	uint64_t val;
213 
214 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
215 
216 	val &= ~((0x7ULL << 51) | (0x3ULL << 49));
217 
218 	val |= (((uint64_t)sub_type<<49) |
219 		((uint64_t)intr_val<<51));
220 
221 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
222 }
223 
224 /* returns 1 if interrupt has been generated for this output queue */
nlm_cms_outputq_intr_check(uint64_t base,int qid)225 int nlm_cms_outputq_intr_check(uint64_t base, int qid)
226 {
227 	uint64_t val;
228 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
229 
230 	return ((val >> 59) & 0x1);
231 }
232 
nlm_cms_outputq_clr_intr(uint64_t base,int qid)233 void nlm_cms_outputq_clr_intr(uint64_t base, int qid)
234 {
235 	uint64_t val;
236 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
237 	val |= (1ULL<<59);
238 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
239 }
240 
nlm_cms_illegal_dst_error_intr(uint64_t base,int en)241 void nlm_cms_illegal_dst_error_intr(uint64_t base, int en)
242 {
243 	uint64_t val;
244 
245 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
246 	val |= (en<<8);
247 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
248 }
249 
nlm_cms_timeout_error_intr(uint64_t base,int en)250 void nlm_cms_timeout_error_intr(uint64_t base, int en)
251 {
252 	uint64_t val;
253 
254 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
255 	val |= (en<<7);
256 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
257 }
258 
nlm_cms_biu_error_resp_intr(uint64_t base,int en)259 void nlm_cms_biu_error_resp_intr(uint64_t base, int en)
260 {
261 	uint64_t val;
262 
263 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
264 	val |= (en<<6);
265 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
266 }
267 
nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base,int en)268 void nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base, int en)
269 {
270 	uint64_t val;
271 
272 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
273 	val |= (en<<5) | (en<<3);
274 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
275 }
276 
nlm_cms_spill_correctable_ecc_error_intr(uint64_t base,int en)277 void nlm_cms_spill_correctable_ecc_error_intr(uint64_t base, int en)
278 {
279 	uint64_t val;
280 
281 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
282 	val |= (en<<4) | (en<<2);
283 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
284 }
285 
nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base,int en)286 void nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base, int en)
287 {
288 	uint64_t val;
289 
290 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
291 	val |= (en<<1);
292 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
293 }
294 
nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base,int en)295 void nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base, int en)
296 {
297 	uint64_t val;
298 
299 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
300 	val |= (en<<0);
301 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
302 }
303 
nlm_cms_network_error_status(uint64_t base)304 uint64_t nlm_cms_network_error_status(uint64_t base)
305 {
306 	return nlm_read_cms_reg(base, CMS_MSG_ERR);
307 }
308 
nlm_cms_get_net_error_code(uint64_t err)309 int nlm_cms_get_net_error_code(uint64_t err)
310 {
311 	return ((err >> 12) & 0xf);
312 }
313 
nlm_cms_get_net_error_syndrome(uint64_t err)314 int nlm_cms_get_net_error_syndrome(uint64_t err)
315 {
316 	return ((err >> 32) & 0x1ff);
317 }
318 
nlm_cms_get_net_error_ramindex(uint64_t err)319 int nlm_cms_get_net_error_ramindex(uint64_t err)
320 {
321 	return ((err >> 44) & 0x7fff);
322 }
323 
nlm_cms_get_net_error_outputq(uint64_t err)324 int nlm_cms_get_net_error_outputq(uint64_t err)
325 {
326 	return ((err >> 16) & 0xfff);
327 }
328 
329 /*========================= FMN Tracing related APIs ================*/
330 
nlm_cms_trace_setup(uint64_t base,int en,uint64_t trace_base,uint64_t trace_limit,int match_dstid_en,int dst_id,int match_srcid_en,int src_id,int wrap)331 void nlm_cms_trace_setup(uint64_t base, int en, uint64_t trace_base,
332 				uint64_t trace_limit, int match_dstid_en,
333 				int dst_id, int match_srcid_en, int src_id,
334 				int wrap)
335 {
336 	uint64_t val;
337 
338 	nlm_write_cms_reg(base, CMS_TRACE_BASE_ADDR, trace_base);
339 	nlm_write_cms_reg(base, CMS_TRACE_LIMIT_ADDR, trace_limit);
340 
341 	val = nlm_read_cms_reg(base, CMS_TRACE_CONFIG);
342 	val |= (((uint64_t)match_dstid_en << 39) |
343 		((dst_id & 0xfff) << 24) |
344 		(match_srcid_en << 23) |
345 		((src_id & 0xfff) << 8) |
346 		(wrap << 1) |
347 		(en << 0));
348 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
349 }
350 
nlm_cms_endian_byte_swap(uint64_t base,int en)351 void nlm_cms_endian_byte_swap (uint64_t base, int en)
352 {
353 	nlm_write_cms_reg(base, CMS_MSG_ENDIAN_SWAP, en);
354 }
355