net/npf/npf_handler.c

/*-
 * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF packet handler.
 *
 * This is the main entry point to the NPF where packet processing happens.
 * There are some important synchronization rules:
 *
 *        1) Lookups into the connection database and configuration (ruleset,
 *        tables, etc) are protected by Epoch-Based Reclamation (EBR);
 *
 *        2) The code in the critical path (protected by EBR) should generally
 *        not block (that includes adaptive mutex acquisitions);
 *
 *        3) Where it will blocks, references should be acquired atomically,
 *        while in the critical path, on the relevant objects.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.50 2024/07/05 04:34:35 rin Exp $");

#include <sys/types.h>
#include <sys/param.h>

#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <net/if.h>
#include <net/pfil.h>
#include <sys/socketvar.h>

#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif

#include "npf_impl.h"
#include "npf_conn.h"

#if defined(_NPF_STANDALONE)
#define   m_freem(m)                    npf->mbufops->free(m)
#define   m_clear_flag(m,f)
#else
#define   m_clear_flag(m,f)   (m)->m_flags &= ~(f)
#endif

#ifndef INET6
#define ip6_reass_packet(x, y)          ENOTSUP
#endif

static int
npf_reassembly(npf_t *npf, npf_cache_t *npc, bool *mff)
{
          nbuf_t *nbuf = npc->npc_nbuf;
          int error = EINVAL;
          struct mbuf *m;

          *mff = false;
          m = nbuf_head_mbuf(nbuf);

          if (npf_iscached(npc, NPC_IP4) && npf->ip4_reassembly) {
                    error = ip_reass_packet(&m);
          } else if (npf_iscached(npc, NPC_IP6) && npf->ip6_reassembly) {
                    error = ip6_reass_packet(&m, npc->npc_hlen);
          } else {
                    /*
                     * Reassembly is disabled: just pass the packet through
                     * the ruleset for inspection.
                     */
                    return 0;
          }

          if (error) {
                    /* Reassembly failed; free the mbuf, clear the nbuf. */
                    npf_stats_inc(npf, NPF_STAT_REASSFAIL);
                    m_freem(m);
                    memset(nbuf, 0, sizeof(nbuf_t));
                    return error;
          }
          if (m == NULL) {
                    /* More fragments should come. */
                    npf_stats_inc(npf, NPF_STAT_FRAGMENTS);
                    *mff = true;
                    return 0;
          }

          /*
           * Reassembly is complete, we have the final packet.
           * Cache again, since layer 4 data is accessible now.
           */
          nbuf_init(npf, nbuf, m, nbuf->nb_ifp);
          npc->npc_info = 0;

          if (npf_cache_all(npc) & (NPC_IPFRAG|NPC_FMTERR)) {
                    return EINVAL;
          }
          npf_stats_inc(npf, NPF_STAT_REASSEMBLY);
          return 0;
}

static inline bool
npf_packet_bypass_tag_p(nbuf_t *nbuf)
{
          uint32_t ntag;
          return nbuf_find_tag(nbuf, &ntag) == 0 && (ntag & NPF_NTAG_PASS) != 0;
}

/*
 * npfk_packet_handler: main packet handling routine for layer 3.
 *
 * Note: packet flow and inspection logic is in strict order.
 */
__dso_public int
npfk_packet_handler(npf_t *npf, struct mbuf **mp, ifnet_t *ifp, int di)
{
          nbuf_t nbuf;
          npf_cache_t npc;
          npf_conn_t *con;
          npf_rule_t *rl;
          npf_rproc_t *rp;
          int error, decision, flags;
          npf_match_info_t mi;
          bool mff;

          KASSERT(ifp != NULL);

          /*
           * Initialize packet information cache.
           * Note: it is enough to clear the info bits.
           */
          nbuf_init(npf, &nbuf, *mp, ifp);
          memset(&npc, 0, sizeof(npf_cache_t));
          npc.npc_ctx = npf;
          npc.npc_nbuf = &nbuf;

          mi.mi_di = di;
          mi.mi_rid = 0;
          mi.mi_retfl = 0;

          *mp = NULL;
          decision = NPF_DECISION_BLOCK;
          error = 0;
          rp = NULL;
          con = NULL;

          /* Cache everything. */
          flags = npf_cache_all(&npc);

          /* Malformed packet, leave quickly. */
          if (flags & NPC_FMTERR) {
                    error = EINVAL;
                    goto out;
          }

          /* Determine whether it is an IP fragment. */
          if (__predict_false(flags & NPC_IPFRAG)) {
                    /* Pass to IPv4/IPv6 reassembly mechanism. */
                    error = npf_reassembly(npf, &npc, &mff);
                    if (error) {
                              goto out;
                    }
                    if (mff) {
                              /* More fragments should come. */
                              return 0;
                    }
          }

          /* Just pass-through if specially tagged. */
          if (npf_packet_bypass_tag_p(&nbuf)) {
                    goto pass;
          }

          /* Inspect the list of connections (if found, acquires a reference). */
          con = npf_conn_inspect(&npc, di, &error);

          /* If "passing" connection found - skip the ruleset inspection. */
          if (con && npf_conn_pass(con, &mi, &rp)) {
                    npf_stats_inc(npf, NPF_STAT_PASS_CONN);
                    KASSERT(error == 0);
                    goto pass;
          }
          if (__predict_false(error)) {
                    if (error == ENETUNREACH)
                              goto block;
                    goto out;
          }

          /* Acquire the lock, inspect the ruleset using this packet. */
          int slock = npf_config_read_enter(npf);
          npf_ruleset_t *rlset = npf_config_ruleset(npf);

          rl = npf_ruleset_inspect(&npc, rlset, di, NPF_LAYER_3);
          if (__predict_false(rl == NULL)) {
                    const bool pass = npf_default_pass(npf);
                    npf_config_read_exit(npf, slock);

                    if (pass) {
                              npf_stats_inc(npf, NPF_STAT_PASS_DEFAULT);
                              goto pass;
                    }
                    npf_stats_inc(npf, NPF_STAT_BLOCK_DEFAULT);
                    goto block;
          }

          /*
           * Get the rule procedure (acquires a reference) for association
           * with a connection (if any) and execution.
           */
          KASSERT(rp == NULL);
          rp = npf_rule_getrproc(rl);

          /* Conclude with the rule and release the lock. */
          error = npf_rule_conclude(rl, &mi);
          npf_config_read_exit(npf, slock);

          if (error) {
                    npf_stats_inc(npf, NPF_STAT_BLOCK_RULESET);
                    goto block;
          }
          npf_stats_inc(npf, NPF_STAT_PASS_RULESET);

          /*
           * Establish a "pass" connection, if required.  Just proceed if
           * connection creation fails (e.g. due to unsupported protocol).
           */
          if ((mi.mi_retfl & NPF_RULE_STATEFUL) != 0 && !con) {
                    con = npf_conn_establish(&npc, di,
                        (mi.mi_retfl & NPF_RULE_GSTATEFUL) == 0);
                    if (con) {
                              /*
                               * Note: the reference on the rule procedure is
                               * transferred to the connection.  It will be
                               * released on connection destruction.
                               */
                              npf_conn_setpass(con, &mi, rp);
                    }
          }

pass:
          decision = NPF_DECISION_PASS;
          KASSERT(error == 0);

          /*
           * Perform NAT.
           */
          error = npf_do_nat(&npc, con, di);

block:
          /*
           * Execute the rule procedure, if any is associated.
           * It may reverse the decision from pass to block.
           */
          if (rp && !npf_rproc_run(&npc, rp, &mi, &decision)) {
                    if (con) {
                              npf_conn_release(con);
                    }
                    npf_rproc_release(rp);
                    /* mbuf already freed */
                    return 0;
          }

out:
          /*
           * Release the reference on a connection.  Release the reference
           * on a rule procedure only if there was no association.
           */
          if (con) {
                    npf_conn_release(con);
          } else if (rp) {
                    npf_rproc_release(rp);
          }

          /* Get the new mbuf pointer. */
          if ((*mp = nbuf_head_mbuf(&nbuf)) == NULL) {
                    return error ? error : ENOMEM;
          }

          /* Pass the packet if decided and there is no error. */
          if (decision == NPF_DECISION_PASS && !error) {
                    /*
                     * XXX: Disable for now, it will be set accordingly later,
                     * for optimisations (to reduce inspection).
                     */
                    m_clear_flag(*mp, M_CANFASTFWD);
                    return 0;
          }

          /*
           * Block the packet.  ENETUNREACH is used to indicate blocking.
           * Depending on the flags and protocol, return TCP reset (RST) or
           * ICMP destination unreachable.
           */
          if (mi.mi_retfl && npf_return_block(&npc, mi.mi_retfl)) {
                    *mp = NULL;
          }

          if (!error) {
                    error = ENETUNREACH;
          }

          /* Free the mbuf chain. */
          m_freem(*mp);
          *mp = NULL;
          return error;
}