1 /*-
2  * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
3  * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
4  * All rights reserved.
5  *
6  * This material is based upon work partially supported by The
7  * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * NPF packet handler.
33  *
34  * This is the main entry point to the NPF where packet processing happens.
35  * There are some important synchronization rules:
36  *
37  *        1) Lookups into the connection database and configuration (ruleset,
38  *        tables, etc) are protected by Epoch-Based Reclamation (EBR);
39  *
40  *        2) The code in the critical path (protected by EBR) should generally
41  *        not block (that includes adaptive mutex acquisitions);
42  *
43  *        3) Where it will blocks, references should be acquired atomically,
44  *        while in the critical path, on the relevant objects.
45  */
46 
47 #ifdef _KERNEL
48 #include <sys/cdefs.h>
49 __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.50 2024/07/05 04:34:35 rin Exp $");
50 
51 #include <sys/types.h>
52 #include <sys/param.h>
53 
54 #include <sys/mbuf.h>
55 #include <sys/mutex.h>
56 #include <net/if.h>
57 #include <net/pfil.h>
58 #include <sys/socketvar.h>
59 
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/ip_var.h>
63 #include <netinet/ip6.h>
64 #include <netinet6/ip6_var.h>
65 #endif
66 
67 #include "npf_impl.h"
68 #include "npf_conn.h"
69 
70 #if defined(_NPF_STANDALONE)
71 #define   m_freem(m)                    npf->mbufops->free(m)
72 #define   m_clear_flag(m,f)
73 #else
74 #define   m_clear_flag(m,f)   (m)->m_flags &= ~(f)
75 #endif
76 
77 #ifndef INET6
78 #define ip6_reass_packet(x, y)          ENOTSUP
79 #endif
80 
81 static int
npf_reassembly(npf_t * npf,npf_cache_t * npc,bool * mff)82 npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff)
83 {
84           nbuf_t *nbuf = npc->npc_nbuf;
85           int error = EINVAL;
86           struct mbuf *m;
87 
88           *mff = false;
89           m = nbuf_head_mbuf(nbuf);
90 
91           if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) {
92                     error = ip_reass_packet(&m);
93           } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) {
94                     error = ip6_reass_packet(&m, npc->npc_hlen);
95           } else {
96                     /*
97                      * Reassembly is disabled: just pass the packet through
98                      * the ruleset for inspection.
99                      */
100                     return 0;
101           }
102 
103           if (error) {
104                     /* Reassembly failed; free the mbuf, clear the nbuf. */
105                     npf_stats_inc(npf, NPF_STAT_REASSFAIL);
106                     m_freem(m);
107                     memset(nbuf, 0, sizeof(nbuf_t));
108                     return error;
109           }
110           if (m == NULL) {
111                     /* More fragments should come. */
112                     npf_stats_inc(npf, NPF_STAT_FRAGMENTS);
113                     *mff = true;
114                     return 0;
115           }
116 
117           /*
118            * Reassembly is complete, we have the final packet.
119            * Cache again, since layer 4 data is accessible now.
120            */
121           nbuf_init(npf, nbuf, m, nbuf->nb_ifp);
122           npc->npc_info = 0;
123 
124           if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) {
125                     return EINVAL;
126           }
127           npf_stats_inc(npf, NPF_STAT_REASSEMBLY);
128           return 0;
129 }
130 
131 static inline bool
npf_packet_bypass_tag_p(nbuf_t * nbuf)132 npf_packet_bypass_tag_p(nbuf_t *nbuf)
133 {
134           uint32_t ntag;
135           return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0;
136 }
137 
138 /*
139  * npfk_packet_handler: main packet handling routine for layer 3.
140  *
141  * Note: packet flow and inspection logic is in strict order.
142  */
143 __dso_public int
npfk_packet_handler(npf_t * npf,struct mbuf ** mp,ifnet_t * ifp,int di)144 npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
145 {
146           nbuf_t nbuf;
147           npf_cache_t npc;
148           npf_conn_t *con;
149           npf_rule_t *rl;
150           npf_rproc_t *rp;
151           int error, decision, flags;
152           npf_match_info_t mi;
153           bool mff;
154 
155           KASSERT(ifp != NULL);
156 
157           /*
158            * Initialize packet information cache.
159            * Note: it is enough to clear the info bits.
160            */
161           nbuf_init(npf, &nbuf, *mp, ifp);
162           memset(&npc, 0, sizeof(npf_cache_t));
163           npc.npc_ctx = npf;
164           npc.npc_nbuf = &nbuf;
165 
166           mi.mi_di = di;
167           mi.mi_rid = 0;
168           mi.mi_retfl = 0;
169 
170           *mp = NULL;
171           decision = NPF_DECISION_BLOCK;
172           error = 0;
173           rp = NULL;
174           con = NULL;
175 
176           /* Cache everything. */
177           flags = npf_cache_all(&npc);
178 
179           /* Malformed packet, leave quickly. */
180           if (flags & NPC_FMTERR) {
181                     error = EINVAL;
182                     goto out;
183           }
184 
185           /* Determine whether it is an IP fragment. */
186           if (__predict_false(flags & NPC_IPFRAG)) {
187                     /* Pass to IPv4/IPv6 reassembly mechanism. */
188                     error = npf_reassembly(npf, &npc, &mff);
189                     if (error) {
190                               goto out;
191                     }
192                     if (mff) {
193                               /* More fragments should come. */
194                               return 0;
195                     }
196           }
197 
198           /* Just pass-through if specially tagged. */
199           if (npf_packet_bypass_tag_p(&nbuf)) {
200                     goto pass;
201           }
202 
203           /* Inspect the list of connections (if found, acquires a reference). */
204           con = npf_conn_inspect(&npc, di, &error);
205 
206           /* If "passing" connection found - skip the ruleset inspection. */
207           if (con && npf_conn_pass(con, &mi, &rp)) {
208                     npf_stats_inc(npf, NPF_STAT_PASS_CONN);
209                     KASSERT(error == 0);
210                     goto pass;
211           }
212           if (__predict_false(error)) {
213                     if (error == ENETUNREACH)
214                               goto block;
215                     goto out;
216           }
217 
218           /* Acquire the lock, inspect the ruleset using this packet. */
219           int slock = npf_config_read_enter(npf);
220           npf_ruleset_t *rlset = npf_config_ruleset(npf);
221 
222           rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3);
223           if (__predict_false(rl == NULL)) {
224                     const bool pass = npf_default_pass(npf);
225                     npf_config_read_exit(npf, slock);
226 
227                     if (pass) {
228                               npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
229                               goto pass;
230                     }
231                     npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
232                     goto block;
233           }
234 
235           /*
236            * Get the rule procedure (acquires a reference) for association
237            * with a connection (if any) and execution.
238            */
239           KASSERT(rp == NULL);
240           rp = npf_rule_getrproc(rl);
241 
242           /* Conclude with the rule and release the lock. */
243           error = npf_rule_conclude(rl, &mi);
244           npf_config_read_exit(npf, slock);
245 
246           if (error) {
247                     npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET);
248                     goto block;
249           }
250           npf_stats_inc(npf, NPF_STAT_PASS_RULESET);
251 
252           /*
253            * Establish a "pass" connection, if required.  Just proceed if
254            * connection creation fails (e.g. due to unsupported protocol).
255            */
256           if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) {
257                     con = npf_conn_establish(&npc, di,
258                         (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0);
259                     if (con) {
260                               /*
261                                * Note: the reference on the rule procedure is
262                                * transferred to the connection.  It will be
263                                * released on connection destruction.
264                                */
265                               npf_conn_setpass(con, &mi, rp);
266                     }
267           }
268 
269 pass:
270           decision = NPF_DECISION_PASS;
271           KASSERT(error == 0);
272 
273           /*
274            * Perform NAT.
275            */
276           error = npf_do_nat(&npc, con, di);
277 
278 block:
279           /*
280            * Execute the rule procedure, if any is associated.
281            * It may reverse the decision from pass to block.
282            */
283           if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) {
284                     if (con) {
285                               npf_conn_release(con);
286                     }
287                     npf_rproc_release(rp);
288                     /* mbuf already freed */
289                     return 0;
290           }
291 
292 out:
293           /*
294            * Release the reference on a connection.  Release the reference
295            * on a rule procedure only if there was no association.
296            */
297           if (con) {
298                     npf_conn_release(con);
299           } else if (rp) {
300                     npf_rproc_release(rp);
301           }
302 
303           /* Get the new mbuf pointer. */
304           if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
305                     return error ? error : ENOMEM;
306           }
307 
308           /* Pass the packet if decided and there is no error. */
309           if (decision == NPF_DECISION_PASS && !error) {
310                     /*
311                      * XXX: Disable for now, it will be set accordingly later,
312                      * for optimisations (to reduce inspection).
313                      */
314                     m_clear_flag(*mp, M_CANFASTFWD);
315                     return 0;
316           }
317 
318           /*
319            * Block the packet.  ENETUNREACH is used to indicate blocking.
320            * Depending on the flags and protocol, return TCP reset (RST) or
321            * ICMP destination unreachable.
322            */
323           if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) {
324                     *mp = NULL;
325           }
326 
327           if (!error) {
328                     error = ENETUNREACH;
329           }
330 
331           /* Free the mbuf chain. */
332           m_freem(*mp);
333           *mp = NULL;
334           return error;
335 }
336