1 /*        $NetBSD: linux_socket.c,v 1.156 2024/10/01 16:41:29 riastradh Exp $   */
2 
3 /*-
4  * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden and Eric Haszlakiewicz.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Functions in multiarch:
34  *        linux_sys_socketcall                    : linux_socketcall.c
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_socket.c,v 1.156 2024/10/01 16:41:29 riastradh Exp $");
39 
40 #if defined(_KERNEL_OPT)
41 #include "opt_inet.h"
42 #endif /* defined(_KERNEL_OPT) */
43 
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/ioctl.h>
49 #include <sys/tty.h>
50 #include <sys/file.h>
51 #include <sys/filedesc.h>
52 #include <sys/select.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 #include <sys/domain.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <netinet/in.h>
60 #include <netinet/tcp.h>
61 #include <sys/mount.h>
62 #include <sys/proc.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/protosw.h>
66 #include <sys/mbuf.h>
67 #include <sys/syslog.h>
68 #include <sys/exec.h>
69 #include <sys/kauth.h>
70 #include <sys/syscallargs.h>
71 #include <sys/ktrace.h>
72 
73 #include <lib/libkern/libkern.h>
74 
75 #include <netinet/ip6.h>
76 #include <netinet6/ip6_var.h>
77 
78 #include <compat/sys/socket.h>
79 #include <compat/sys/sockio.h>
80 
81 #include <compat/linux/common/linux_types.h>
82 #include <compat/linux/common/linux_util.h>
83 #include <compat/linux/common/linux_signal.h>
84 #include <compat/linux/common/linux_ioctl.h>
85 #include <compat/linux/common/linux_sched.h>
86 #include <compat/linux/common/linux_socket.h>
87 #include <compat/linux/common/linux_fcntl.h>
88 #if !defined(__aarch64__) && !defined(__alpha__) && !defined(__amd64__)
89 #include <compat/linux/common/linux_socketcall.h>
90 #endif
91 #include <compat/linux/common/linux_sockio.h>
92 #include <compat/linux/common/linux_ipc.h>
93 #include <compat/linux/common/linux_sem.h>
94 
95 #include <compat/linux/linux_syscallargs.h>
96 
97 #ifdef DEBUG_LINUX
98 #define DPRINTF(a) uprintf a
99 #else
100 #define DPRINTF(a)
101 #endif
102 
103 /*
104  * The calls in this file are entered either via the linux_socketcall()
105  * interface or, on the Alpha, as individual syscalls.  The
106  * linux_socketcall function does any massaging of arguments so that all
107  * the calls in here need not think that they are anything other
108  * than a normal syscall.
109  */
110 
111 static int linux_to_bsd_domain(int);
112 static int bsd_to_linux_domain(int);
113 static int linux_to_bsd_type(int);
114 int linux_to_bsd_sopt_level(int);
115 int linux_to_bsd_so_sockopt(int);
116 int linux_to_bsd_ip_sockopt(int);
117 int linux_to_bsd_ipv6_sockopt(int);
118 int linux_to_bsd_tcp_sockopt(int);
119 int linux_to_bsd_udp_sockopt(int);
120 int linux_getifname(struct lwp *, register_t *, void *);
121 int linux_getifconf(struct lwp *, register_t *, void *);
122 int linux_getifhwaddr(struct lwp *, register_t *, u_int, void *);
123 static int linux_get_sa(struct lwp *, int, struct sockaddr_big *,
124                     const struct osockaddr *, socklen_t);
125 static int linux_sa_put(struct osockaddr *osa);
126 static int linux_to_bsd_msg_flags(int);
127 static int bsd_to_linux_msg_flags(int);
128 static void linux_to_bsd_msghdr(const struct linux_msghdr *, struct msghdr *);
129 static void bsd_to_linux_msghdr(const struct msghdr *, struct linux_msghdr *);
130 
131 static const int linux_to_bsd_domain_[LINUX_AF_MAX] = {
132           AF_UNSPEC,
133           AF_UNIX,
134           AF_INET,
135           AF_CCITT, /* LINUX_AF_AX25 */
136           AF_IPX,
137           AF_APPLETALK,
138           -1,                 /* LINUX_AF_NETROM */
139           -1,                 /* LINUX_AF_BRIDGE */
140           -1,                 /* LINUX_AF_ATMPVC */
141           AF_CCITT, /* LINUX_AF_X25 */
142           AF_INET6,
143           -1,                 /* LINUX_AF_ROSE */
144           AF_DECnet,
145           -1,                 /* LINUX_AF_NETBEUI */
146           -1,                 /* LINUX_AF_SECURITY */
147           pseudo_AF_KEY,
148           AF_ROUTE, /* LINUX_AF_NETLINK */
149           -1,                 /* LINUX_AF_PACKET */
150           -1,                 /* LINUX_AF_ASH */
151           -1,                 /* LINUX_AF_ECONET */
152           -1,                 /* LINUX_AF_ATMSVC */
153           AF_SNA,
154           /* rest up to LINUX_AF_MAX-1 is not allocated */
155           -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
156 };
157 
158 static const int bsd_to_linux_domain_[AF_MAX] = {
159           LINUX_AF_UNSPEC,
160           LINUX_AF_UNIX,
161           LINUX_AF_INET,
162           -1,                 /* AF_IMPLINK */
163           -1,                 /* AF_PUP */
164           -1,                 /* AF_CHAOS */
165           -1,                 /* AF_NS */
166           -1,                 /* AF_ISO */
167           -1,                 /* AF_ECMA */
168           -1,                 /* AF_DATAKIT */
169           LINUX_AF_AX25,      /* AF_CCITT */
170           LINUX_AF_SNA,
171           LINUX_AF_DECnet,
172           -1,                 /* AF_DLI */
173           -1,                 /* AF_LAT */
174           -1,                 /* AF_HYLINK */
175           LINUX_AF_APPLETALK,
176           LINUX_AF_NETLINK,
177           -1,                 /* AF_LINK */
178           -1,                 /* AF_XTP */
179           -1,                 /* AF_COIP */
180           -1,                 /* AF_CNT */
181           -1,                 /* pseudo_AF_RTIP */
182           LINUX_AF_IPX,
183           LINUX_AF_INET6,
184           -1,                 /* pseudo_AF_PIP */
185           -1,                 /* AF_ISDN */
186           -1,                 /* AF_NATM */
187           -1,                 /* AF_ARP */
188           LINUX_pseudo_AF_KEY,
189           -1,                 /* pseudo_AF_HDRCMPLT */
190 };
191 
192 static const struct {
193           int bfl;
194           int lfl;
195 } bsd_to_linux_msg_flags_[] = {
196           {MSG_OOB,           LINUX_MSG_OOB},
197           {MSG_PEEK,                    LINUX_MSG_PEEK},
198           {MSG_DONTROUTE,               LINUX_MSG_DONTROUTE},
199           {MSG_EOR,           LINUX_MSG_EOR},
200           {MSG_TRUNC,                   LINUX_MSG_TRUNC},
201           {MSG_CTRUNC,                  LINUX_MSG_CTRUNC},
202           {MSG_WAITALL,                 LINUX_MSG_WAITALL},
203           {MSG_DONTWAIT,                LINUX_MSG_DONTWAIT},
204           {MSG_BCAST,                   0},                 /* not supported, clear */
205           {MSG_MCAST,                   0},                 /* not supported, clear */
206           {MSG_NOSIGNAL,                LINUX_MSG_NOSIGNAL},
207           {-1, /* not supp */ LINUX_MSG_PROBE},
208           {-1, /* not supp */ LINUX_MSG_FIN},
209           {-1, /* not supp */ LINUX_MSG_SYN},
210           {-1, /* not supp */ LINUX_MSG_CONFIRM},
211           {-1, /* not supp */ LINUX_MSG_RST},
212           {-1, /* not supp */ LINUX_MSG_ERRQUEUE},
213           {-1, /* not supp */ LINUX_MSG_MORE},
214 };
215 
216 /*
217  * Convert between Linux and BSD socket domain values
218  */
219 static int
linux_to_bsd_domain(int ldom)220 linux_to_bsd_domain(int ldom)
221 {
222           if (ldom < 0 || ldom >= LINUX_AF_MAX)
223                     return (-1);
224 
225           return linux_to_bsd_domain_[ldom];
226 }
227 
228 /*
229  * Convert between BSD and Linux socket domain values
230  */
231 static int
bsd_to_linux_domain(int bdom)232 bsd_to_linux_domain(int bdom)
233 {
234           if (bdom < 0 || bdom >= AF_MAX)
235                     return (-1);
236 
237           return bsd_to_linux_domain_[bdom];
238 }
239 
240 static int
linux_to_bsd_type(int ltype)241 linux_to_bsd_type(int ltype)
242 {
243           int type, flags;
244 
245           /* Real types are identical between Linux and NetBSD */
246           type = ltype & LINUX_SOCK_TYPE_MASK;
247 
248           /* But flags are not .. */
249           flags = ltype & ~LINUX_SOCK_TYPE_MASK;
250           if (flags & ~(LINUX_SOCK_CLOEXEC|LINUX_SOCK_NONBLOCK))
251                     return -1;
252 
253           if (flags & LINUX_SOCK_CLOEXEC)
254                     type |= SOCK_CLOEXEC;
255           if (flags & LINUX_SOCK_NONBLOCK)
256                     type |= SOCK_NONBLOCK;
257 
258           return type;
259 }
260 
261 static int
linux_to_bsd_msg_flags(int lflag)262 linux_to_bsd_msg_flags(int lflag)
263 {
264           int i, lfl, bfl;
265           int bflag = 0;
266 
267           if (lflag == 0)
268                     return (0);
269 
270           for(i = 0; i < __arraycount(bsd_to_linux_msg_flags_); i++) {
271                     bfl = bsd_to_linux_msg_flags_[i].bfl;
272                     lfl = bsd_to_linux_msg_flags_[i].lfl;
273 
274                     if (lfl == 0)
275                               continue;
276 
277                     if (lflag & lfl) {
278                               if (bfl < 0)
279                                         return (-1);
280 
281                               bflag |= bfl;
282                     }
283           }
284 
285           return (bflag);
286 }
287 
288 static int
bsd_to_linux_msg_flags(int bflag)289 bsd_to_linux_msg_flags(int bflag)
290 {
291           int i, lfl, bfl;
292           int lflag = 0;
293 
294           if (bflag == 0)
295                     return (0);
296 
297           for(i = 0; i < __arraycount(bsd_to_linux_msg_flags_); i++) {
298                     bfl = bsd_to_linux_msg_flags_[i].bfl;
299                     lfl = bsd_to_linux_msg_flags_[i].lfl;
300 
301                     if (bfl <= 0)
302                               continue;
303 
304                     if (bflag & bfl) {
305                               if (lfl < 0)
306                                         return (-1);
307 
308                               lflag |= lfl;
309                     }
310           }
311 
312           return (lflag);
313 }
314 
315 int
linux_sys_socket(struct lwp * l,const struct linux_sys_socket_args * uap,register_t * retval)316 linux_sys_socket(struct lwp *l, const struct linux_sys_socket_args *uap, register_t *retval)
317 {
318           /* {
319                     syscallarg(int)     domain;
320                     syscallarg(int)     type;
321                     syscallarg(int) protocol;
322           } */
323           struct sys___socket30_args bsa;
324           int error;
325 
326 
327           SCARG(&bsa, protocol) = SCARG(uap, protocol);
328           SCARG(&bsa, domain) = linux_to_bsd_domain(SCARG(uap, domain));
329           if (SCARG(&bsa, domain) == -1)
330                     return EINVAL;
331           SCARG(&bsa, type) = linux_to_bsd_type(SCARG(uap, type));
332           if (SCARG(&bsa, type) == -1)
333                     return EINVAL;
334           /*
335            * Apparently linux uses this to talk to ISDN sockets. If we fail
336            * now programs seems to handle it, but if we don't we are going
337            * to fail when we bind and programs don't handle this well.
338            */
339           if (SCARG(&bsa, domain) == AF_ROUTE && SCARG(&bsa, type) == SOCK_RAW)
340                     return ENOTSUP;
341           error = sys___socket30(l, &bsa, retval);
342 
343 #ifdef INET6
344           /*
345            * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
346            * default and some apps depend on this. So, set V6ONLY to 0
347            * for Linux apps if the sysctl value is set to 1.
348            */
349           if (!error && ip6_v6only && SCARG(&bsa, domain) == PF_INET6) {
350                     struct socket *so;
351 
352                     if (fd_getsock(*retval, &so) == 0) {
353                               int val = 0;
354 
355                               /* ignore error */
356                               (void)so_setsockopt(l, so, IPPROTO_IPV6, IPV6_V6ONLY,
357                                   &val, sizeof(val));
358 
359                               fd_putfile(*retval);
360                     }
361           }
362 #endif
363 
364           return (error);
365 }
366 
367 int
linux_sys_socketpair(struct lwp * l,const struct linux_sys_socketpair_args * uap,register_t * retval)368 linux_sys_socketpair(struct lwp *l, const struct linux_sys_socketpair_args *uap, register_t *retval)
369 {
370           /* {
371                     syscallarg(int) domain;
372                     syscallarg(int) type;
373                     syscallarg(int) protocol;
374                     syscallarg(int *) rsv;
375           } */
376           struct sys_socketpair_args bsa;
377 
378           SCARG(&bsa, domain) = linux_to_bsd_domain(SCARG(uap, domain));
379           if (SCARG(&bsa, domain) == -1)
380                     return EINVAL;
381           SCARG(&bsa, type) = linux_to_bsd_type(SCARG(uap, type));
382           if (SCARG(&bsa, type) == -1)
383                     return EINVAL;
384           SCARG(&bsa, protocol) = SCARG(uap, protocol);
385           SCARG(&bsa, rsv) = SCARG(uap, rsv);
386 
387           return sys_socketpair(l, &bsa, retval);
388 }
389 
390 int
linux_sys_sendto(struct lwp * l,const struct linux_sys_sendto_args * uap,register_t * retval)391 linux_sys_sendto(struct lwp *l, const struct linux_sys_sendto_args *uap, register_t *retval)
392 {
393           /* {
394                     syscallarg(int)                                   s;
395                     syscallarg(void *)                      msg;
396                     syscallarg(int)                                   len;
397                     syscallarg(int)                                   flags;
398                     syscallarg(struct osockaddr *)                    to;
399                     syscallarg(int)                                   tolen;
400           } */
401           struct msghdr   msg;
402           struct iovec    aiov;
403           struct sockaddr_big nam;
404           struct mbuf *m;
405           int bflags;
406           int error;
407 
408           /* Translate message flags.  */
409           bflags = linux_to_bsd_msg_flags(SCARG(uap, flags));
410           if (bflags < 0)
411                     /* Some supported flag */
412                     return EINVAL;
413 
414           msg.msg_flags = 0;
415           msg.msg_name = NULL;
416           msg.msg_control = NULL;
417 
418           if (SCARG(uap, tolen)) {
419                     /* Read in and convert the sockaddr */
420                     error = linux_get_sa(l, SCARG(uap, s), &nam, SCARG(uap, to),
421                         SCARG(uap, tolen));
422                     if (error)
423                               return error;
424                     error = sockargs(&m, &nam, nam.sb_len, UIO_SYSSPACE, MT_SONAME);
425                     if (error)
426                               return error;
427                     msg.msg_flags |= MSG_NAMEMBUF;
428                     msg.msg_name = m;
429                     msg.msg_namelen = nam.sb_len;
430           }
431 
432           msg.msg_iov = &aiov;
433           msg.msg_iovlen = 1;
434           aiov.iov_base = __UNCONST(SCARG(uap, msg));
435           aiov.iov_len = SCARG(uap, len);
436 
437           return do_sys_sendmsg(l, SCARG(uap, s), &msg, bflags, retval);
438 }
439 
440 static void
linux_to_bsd_msghdr(const struct linux_msghdr * lmsg,struct msghdr * bmsg)441 linux_to_bsd_msghdr(const struct linux_msghdr *lmsg, struct msghdr *bmsg)
442 {
443           memset(bmsg, 0, sizeof(*bmsg));
444           bmsg->msg_name = lmsg->msg_name;
445           bmsg->msg_namelen = lmsg->msg_namelen;
446           bmsg->msg_iov = lmsg->msg_iov;
447           bmsg->msg_iovlen = lmsg->msg_iovlen;
448           bmsg->msg_control = lmsg->msg_control;
449           bmsg->msg_controllen = lmsg->msg_controllen;
450           bmsg->msg_flags = lmsg->msg_flags;
451 }
452 
453 static void
bsd_to_linux_msghdr(const struct msghdr * bmsg,struct linux_msghdr * lmsg)454 bsd_to_linux_msghdr(const struct msghdr *bmsg, struct linux_msghdr *lmsg)
455 {
456           memset(lmsg, 0, sizeof(*lmsg));
457           lmsg->msg_name = bmsg->msg_name;
458           lmsg->msg_namelen = bmsg->msg_namelen;
459           lmsg->msg_iov = bmsg->msg_iov;
460           lmsg->msg_iovlen = bmsg->msg_iovlen;
461           lmsg->msg_control = bmsg->msg_control;
462           lmsg->msg_controllen = bmsg->msg_controllen;
463           lmsg->msg_flags = bmsg->msg_flags;
464 }
465 
466 int
linux_sys_sendmsg(struct lwp * l,const struct linux_sys_sendmsg_args * uap,register_t * retval)467 linux_sys_sendmsg(struct lwp *l, const struct linux_sys_sendmsg_args *uap, register_t *retval)
468 {
469           /* {
470                     syscallarg(int) s;
471                     syscallarg(struct linux_msghdr *) msg;
472                     syscallarg(u_int) flags;
473           } */
474           struct msghdr       msg;
475           struct linux_msghdr lmsg;
476           int                 error;
477           int                 bflags;
478           struct sockaddr_big nam;
479           u_int8_t  *control;
480           struct mbuf     *ctl_mbuf = NULL;
481 
482           error = copyin(SCARG(uap, msg), &lmsg, sizeof(lmsg));
483           if (error)
484                     return error;
485           linux_to_bsd_msghdr(&lmsg, &msg);
486 
487           msg.msg_flags = MSG_IOVUSRSPACE;
488 
489           /*
490            * Translate message flags.
491            */
492           bflags = linux_to_bsd_msg_flags(SCARG(uap, flags));
493           if (bflags < 0)
494                     /* Some supported flag */
495                     return EINVAL;
496 
497           if (lmsg.msg_name) {
498                     /* Read in and convert the sockaddr */
499                     error = linux_get_sa(l, SCARG(uap, s), &nam, msg.msg_name,
500                         msg.msg_namelen);
501                     if (error)
502                               return (error);
503                     msg.msg_name = &nam;
504           }
505 
506           /*
507            * Handle cmsg if there is any.
508            */
509           if (LINUX_CMSG_FIRSTHDR(&lmsg)) {
510                     struct linux_cmsghdr l_cmsg, *l_cc;
511                     struct cmsghdr *cmsg;
512                     ssize_t resid = msg.msg_controllen;
513                     size_t clen, cidx = 0, cspace;
514 
515                     ctl_mbuf = m_get(M_WAIT, MT_CONTROL);
516                     clen = MLEN;
517                     control = mtod(ctl_mbuf, void *);
518 
519                     l_cc = LINUX_CMSG_FIRSTHDR(&lmsg);
520                     do {
521                               error = copyin(l_cc, &l_cmsg, sizeof(l_cmsg));
522                               if (error)
523                                         goto done;
524 
525                               /*
526                                * Sanity check the control message length.
527                                */
528                               if (l_cmsg.cmsg_len > resid
529                                   || l_cmsg.cmsg_len < sizeof l_cmsg) {
530                                         error = EINVAL;
531                                         goto done;
532                               }
533 
534                               /*
535                                * Refuse unsupported control messages, and
536                                * translate fields as appropriate.
537                                */
538                               switch (l_cmsg.cmsg_level) {
539                               case LINUX_SOL_SOCKET:
540                                         /* It only differs on some archs */
541                                         if (LINUX_SOL_SOCKET != SOL_SOCKET)
542                                                   l_cmsg.cmsg_level = SOL_SOCKET;
543 
544                                         switch(l_cmsg.cmsg_type) {
545                                         case LINUX_SCM_RIGHTS:
546                                                   /* Linux SCM_RIGHTS is same as NetBSD */
547                                                   break;
548 
549                                         case LINUX_SCM_CREDENTIALS:
550                                                   /* no native equivalent, just drop it */
551                                                   if (control != mtod(ctl_mbuf, void *))
552                                                             free(control, M_MBUF);
553                                                   m_free(ctl_mbuf);
554                                                   ctl_mbuf = NULL;
555                                                   msg.msg_control = NULL;
556                                                   msg.msg_controllen = 0;
557                                                   goto skipcmsg;
558 
559                                         default:
560                                                   /* other types not supported */
561                                                   error = EINVAL;
562                                                   goto done;
563                                         }
564                                         break;
565                               default:
566                                         /* pray and leave intact */
567                                         break;
568                               }
569 
570                               cspace = CMSG_SPACE(l_cmsg.cmsg_len - sizeof(l_cmsg));
571 
572                               /* Check the buffer is big enough */
573                               if (__predict_false(cidx + cspace > clen)) {
574                                         u_int8_t *nc;
575                                         size_t nclen;
576 
577                                         nclen = cidx + cspace;
578                                         if (nclen >= PAGE_SIZE) {
579                                                   error = EINVAL;
580                                                   goto done;
581                                         }
582                                         nc = realloc(clen <= MLEN ? NULL : control,
583                                                             nclen, M_TEMP, M_WAITOK);
584                                         if (!nc) {
585                                                   error = ENOMEM;
586                                                   goto done;
587                                         }
588                                         if (cidx <= MLEN)
589                                                   /* Old buffer was in mbuf... */
590                                                   memcpy(nc, control, cidx);
591                                         control = nc;
592                                         clen = nclen;
593                               }
594 
595                               /* Copy header */
596                               cmsg = (void *)&control[cidx];
597                               cmsg->cmsg_len = l_cmsg.cmsg_len + LINUX_CMSG_ALIGN_DELTA;
598                               cmsg->cmsg_level = l_cmsg.cmsg_level;
599                               cmsg->cmsg_type = l_cmsg.cmsg_type;
600 
601                               /* Zero area between header and data */
602                               memset(cmsg + 1, 0,
603                                         CMSG_ALIGN(sizeof(*cmsg)) - sizeof(*cmsg));
604 
605                               /* Copyin the data */
606                               error = copyin(LINUX_CMSG_DATA(l_cc),
607                                         CMSG_DATA(cmsg),
608                                         l_cmsg.cmsg_len - sizeof(l_cmsg));
609                               if (error)
610                                         goto done;
611 
612                               resid -= LINUX_CMSG_ALIGN(l_cmsg.cmsg_len);
613                               cidx += cspace;
614                     } while ((l_cc = LINUX_CMSG_NXTHDR(&msg, l_cc, &l_cmsg)) && resid > 0);
615 
616                     /* If we allocated a buffer, attach to mbuf */
617                     if (cidx > MLEN) {
618                               MEXTADD(ctl_mbuf, control, clen, M_MBUF, NULL, NULL);
619                               ctl_mbuf->m_flags |= M_EXT_RW;
620                     }
621                     control = NULL;
622                     ctl_mbuf->m_len = cidx;
623 
624                     msg.msg_control = ctl_mbuf;
625                     msg.msg_flags |= MSG_CONTROLMBUF;
626 
627                     ktrkuser("mbcontrol", mtod(ctl_mbuf, void *),
628                         msg.msg_controllen);
629           }
630 
631 skipcmsg:
632           error = do_sys_sendmsg(l, SCARG(uap, s), &msg, bflags, retval);
633           /* Freed internally */
634           ctl_mbuf = NULL;
635 
636 done:
637           if (ctl_mbuf != NULL) {
638                     if (control != NULL && control != mtod(ctl_mbuf, void *))
639                               free(control, M_MBUF);
640                     m_free(ctl_mbuf);
641           }
642           return (error);
643 }
644 
645 int
linux_sys_recvfrom(struct lwp * l,const struct linux_sys_recvfrom_args * uap,register_t * retval)646 linux_sys_recvfrom(struct lwp *l, const struct linux_sys_recvfrom_args *uap, register_t *retval)
647 {
648           /* {
649                     syscallarg(int) s;
650                     syscallarg(void *) buf;
651                     syscallarg(int) len;
652                     syscallarg(int) flags;
653                     syscallarg(struct osockaddr *) from;
654                     syscallarg(int *) fromlenaddr;
655           } */
656           int                 error;
657           struct sys_recvfrom_args bra;
658 
659           SCARG(&bra, s) = SCARG(uap, s);
660           SCARG(&bra, buf) = SCARG(uap, buf);
661           SCARG(&bra, len) = SCARG(uap, len);
662           SCARG(&bra, flags) = SCARG(uap, flags);
663           SCARG(&bra, from) = (struct sockaddr *) SCARG(uap, from);
664           SCARG(&bra, fromlenaddr) = (socklen_t *)SCARG(uap, fromlenaddr);
665 
666           if ((error = sys_recvfrom(l, &bra, retval)))
667                     return (error);
668 
669           if (SCARG(uap, from) && (error = linux_sa_put(SCARG(uap, from))))
670                     return (error);
671 
672           return (0);
673 }
674 
675 static int
linux_copyout_msg_control(struct lwp * l,struct msghdr * mp,struct mbuf * control)676 linux_copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
677 {
678           int dlen, error = 0;
679           struct cmsghdr *cmsg;
680           struct linux_cmsghdr linux_cmsg;
681           struct mbuf *m;
682           char *q, *q_end;
683 
684           if (mp->msg_controllen <= 0 || control == 0) {
685                     mp->msg_controllen = 0;
686                     free_control_mbuf(l, control, control);
687                     return 0;
688           }
689 
690           ktrkuser("msgcontrol", mtod(control, void *), mp->msg_controllen);
691 
692           q = (char *)mp->msg_control;
693           q_end = q + mp->msg_controllen;
694 
695           for (m = control; m != NULL; ) {
696                     cmsg = mtod(m, struct cmsghdr *);
697 
698                     /*
699                      * Fixup cmsg. We handle two things:
700                      * 0. different sizeof cmsg_len.
701                      * 1. different values for level/type on some archs
702                      * 2. different alignment of CMSG_DATA on some archs
703                      */
704                     memset(&linux_cmsg, 0, sizeof(linux_cmsg));
705                     linux_cmsg.cmsg_len = cmsg->cmsg_len - LINUX_CMSG_ALIGN_DELTA;
706                     linux_cmsg.cmsg_level = cmsg->cmsg_level;
707                     linux_cmsg.cmsg_type = cmsg->cmsg_type;
708 
709                     dlen = q_end - q;
710                     if (linux_cmsg.cmsg_len > dlen) {
711                               /* Not enough room for the parameter */
712                               dlen -= sizeof linux_cmsg;
713                               if (dlen <= 0)
714                                         /* Discard if header wont fit */
715                                         break;
716                               mp->msg_flags |= MSG_CTRUNC;
717                               if (linux_cmsg.cmsg_level == SOL_SOCKET
718                                   && linux_cmsg.cmsg_type == SCM_RIGHTS)
719                                         /* Do not truncate me ... */
720                                         break;
721                     } else
722                               dlen = linux_cmsg.cmsg_len - sizeof linux_cmsg;
723 
724                     switch (linux_cmsg.cmsg_level) {
725                     case SOL_SOCKET:
726                               linux_cmsg.cmsg_level = LINUX_SOL_SOCKET;
727                               switch (linux_cmsg.cmsg_type) {
728                               case SCM_RIGHTS:
729                                         /* Linux SCM_RIGHTS is same as NetBSD */
730                                         break;
731 
732                               default:
733                                         /* other types not supported */
734                                         error = EINVAL;
735                                         goto done;
736                               }
737                               /* machine dependent ! */
738                               break;
739                     default:
740                               /* pray and leave intact */
741                               break;
742                     }
743 
744                     /* There can be padding between the header and data... */
745                     error = copyout(&linux_cmsg, q, sizeof linux_cmsg);
746                     if (error != 0) {
747                               error = copyout(CCMSG_DATA(cmsg), q + sizeof linux_cmsg,
748                                   dlen);
749                     }
750                     if (error != 0) {
751                               /* We must free all the SCM_RIGHTS */
752                               m = control;
753                               break;
754                     }
755                     m = m->m_next;
756                     if (m == NULL || q + LINUX_CMSG_SPACE(dlen) > q_end) {
757                               q += LINUX_CMSG_LEN(dlen);
758                               break;
759                     }
760                     q += LINUX_CMSG_SPACE(dlen);
761           }
762 
763   done:
764           free_control_mbuf(l, control, m);
765 
766           mp->msg_controllen = q - (char *)mp->msg_control;
767           return error;
768 }
769 
770 int
linux_sys_recvmsg(struct lwp * l,const struct linux_sys_recvmsg_args * uap,register_t * retval)771 linux_sys_recvmsg(struct lwp *l, const struct linux_sys_recvmsg_args *uap, register_t *retval)
772 {
773           /* {
774                     syscallarg(int) s;
775                     syscallarg(struct linux_msghdr *) msg;
776                     syscallarg(u_int) flags;
777           } */
778           struct msghdr       msg;
779           struct linux_msghdr lmsg;
780           int                 error;
781           struct mbuf         *from, *control;
782 
783           error = copyin(SCARG(uap, msg), &lmsg, sizeof(lmsg));
784           if (error)
785                     return (error);
786           linux_to_bsd_msghdr(&lmsg, &msg);
787 
788           msg.msg_flags = linux_to_bsd_msg_flags(SCARG(uap, flags));
789           if (msg.msg_flags < 0) {
790                     /* Some unsupported flag */
791                     return (EINVAL);
792           }
793           msg.msg_flags |= MSG_IOVUSRSPACE;
794 
795           error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
796               msg.msg_control != NULL ? &control : NULL, retval);
797           if (error != 0)
798                     return error;
799 
800           if (msg.msg_control != NULL)
801                     error = linux_copyout_msg_control(l, &msg, control);
802 
803           if (error == 0 && from != 0) {
804                     mtod(from, struct osockaddr *)->sa_family =
805                         bsd_to_linux_domain(mtod(from, struct sockaddr *)->sa_family);
806                     error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
807                               from);
808           } else
809                     msg.msg_namelen = 0;
810 
811           if (from != NULL)
812                     m_free(from);
813 
814           if (error == 0) {
815                     msg.msg_flags = bsd_to_linux_msg_flags(msg.msg_flags);
816                     if (msg.msg_flags < 0)
817                               /* Some flag unsupported by Linux */
818                               error = EINVAL;
819                     else {
820                               ktrkuser("msghdr", &msg, sizeof(msg));
821                               bsd_to_linux_msghdr(&msg, &lmsg);
822                               error = copyout(&lmsg, SCARG(uap, msg), sizeof(lmsg));
823                     }
824           }
825 
826           return (error);
827 }
828 
829 /*
830  * Convert socket option level from Linux to NetBSD value. Only SOL_SOCKET
831  * is different, the rest matches IPPROTO_* on both systems.
832  */
833 int
linux_to_bsd_sopt_level(int llevel)834 linux_to_bsd_sopt_level(int llevel)
835 {
836 
837           switch (llevel) {
838           case LINUX_SOL_SOCKET:
839                     return SOL_SOCKET;
840           case LINUX_SOL_IP:
841                     return IPPROTO_IP;
842 #ifdef INET6
843           case LINUX_SOL_IPV6:
844                     return IPPROTO_IPV6;
845 #endif
846           case LINUX_SOL_TCP:
847                     return IPPROTO_TCP;
848           case LINUX_SOL_UDP:
849                     return IPPROTO_UDP;
850           default:
851                     return -1;
852           }
853 }
854 
855 /*
856  * Convert Linux socket level socket option numbers to NetBSD values.
857  */
858 int
linux_to_bsd_so_sockopt(int lopt)859 linux_to_bsd_so_sockopt(int lopt)
860 {
861 
862           switch (lopt) {
863           case LINUX_SO_DEBUG:
864                     return SO_DEBUG;
865           case LINUX_SO_REUSEADDR:
866                     /*
867                      * Linux does not implement SO_REUSEPORT, but allows reuse of
868                      * a host:port pair through SO_REUSEADDR even if the address
869                      * is not a multicast-address. Effectively, this means that we
870                      * should use SO_REUSEPORT to allow Linux applications to not
871                      * exit with EADDRINUSE
872                      */
873                     return SO_REUSEPORT;
874           case LINUX_SO_TYPE:
875                     return SO_TYPE;
876           case LINUX_SO_ERROR:
877                     return SO_ERROR;
878           case LINUX_SO_DONTROUTE:
879                     return SO_DONTROUTE;
880           case LINUX_SO_BROADCAST:
881                     return SO_BROADCAST;
882           case LINUX_SO_SNDBUF:
883                     return SO_SNDBUF;
884           case LINUX_SO_RCVBUF:
885                     return SO_RCVBUF;
886           case LINUX_SO_KEEPALIVE:
887                     return SO_KEEPALIVE;
888           case LINUX_SO_OOBINLINE:
889                     return SO_OOBINLINE;
890           case LINUX_SO_NO_CHECK:
891           case LINUX_SO_PRIORITY:
892                     return -1;
893           case LINUX_SO_LINGER:
894                     return SO_LINGER;
895           case LINUX_SO_BSDCOMPAT:
896           case LINUX_SO_PASSCRED:
897           case LINUX_SO_PEERCRED:
898                     return -1;
899           case LINUX_SO_RCVLOWAT:
900                     return SO_RCVLOWAT;
901           case LINUX_SO_SNDLOWAT:
902                     return SO_SNDLOWAT;
903           case LINUX_SO_RCVTIMEO:
904                     return SO_RCVTIMEO;
905           case LINUX_SO_SNDTIMEO:
906                     return SO_SNDTIMEO;
907           case LINUX_SO_SECURITY_AUTHENTICATION:
908           case LINUX_SO_SECURITY_ENCRYPTION_TRANSPORT:
909           case LINUX_SO_SECURITY_ENCRYPTION_NETWORK:
910           case LINUX_SO_BINDTODEVICE:
911           case LINUX_SO_ATTACH_FILTER:
912           case LINUX_SO_DETACH_FILTER:
913           case LINUX_SO_PEERNAME:
914                     return -1;
915           case LINUX_SO_TIMESTAMP:
916                     return SO_TIMESTAMP;
917           case LINUX_SO_ACCEPTCONN:
918           case LINUX_SO_PEERSEC:
919           case LINUX_SO_SNDBUFFORCE:
920           case LINUX_SO_RCVBUFFORCE:
921           case LINUX_SO_PASSSEC:
922           case LINUX_SO_TIMESTAMPNS:
923           case LINUX_SO_MARK:
924           case LINUX_SO_TIMESTAMPING:
925           case LINUX_SO_PROTOCOL:
926           case LINUX_SO_DOMAIN:
927           case LINUX_SO_RXQ_OVFL:
928           case LINUX_SO_WIFI_STATUS:
929           case LINUX_SO_PEEK_OFF:
930           case LINUX_SO_NOFCS:
931           default:
932                     return -1;
933           }
934 }
935 
936 /*
937  * Convert Linux IP level socket option number to NetBSD values.
938  */
939 int
linux_to_bsd_ip_sockopt(int lopt)940 linux_to_bsd_ip_sockopt(int lopt)
941 {
942 
943           switch (lopt) {
944           case LINUX_IP_TOS:
945                     return IP_TOS;
946           case LINUX_IP_TTL:
947                     return IP_TTL;
948           case LINUX_IP_HDRINCL:
949                     return IP_HDRINCL;
950           case LINUX_IP_MULTICAST_TTL:
951                     return IP_MULTICAST_TTL;
952           case LINUX_IP_MULTICAST_LOOP:
953                     return IP_MULTICAST_LOOP;
954           case LINUX_IP_MULTICAST_IF:
955                     return IP_MULTICAST_IF;
956           case LINUX_IP_ADD_MEMBERSHIP:
957                     return IP_ADD_MEMBERSHIP;
958           case LINUX_IP_DROP_MEMBERSHIP:
959                     return IP_DROP_MEMBERSHIP;
960           case LINUX_IP_RECVERR:
961                     return -2;          /* ignored */
962           default:
963                     return -1;
964           }
965 }
966 
967 /*
968  * Convert Linux IPV6 level socket option number to NetBSD values.
969  */
970 #ifdef INET6
971 int
linux_to_bsd_ipv6_sockopt(int lopt)972 linux_to_bsd_ipv6_sockopt(int lopt)
973 {
974 
975           switch (lopt) {
976           case LINUX_IPV6_V6ONLY:
977                     return IPV6_V6ONLY;
978           default:
979                     return -1;
980           }
981 }
982 #endif
983 
984 /*
985  * Convert Linux TCP level socket option number to NetBSD values.
986  */
987 int
linux_to_bsd_tcp_sockopt(int lopt)988 linux_to_bsd_tcp_sockopt(int lopt)
989 {
990 
991           switch (lopt) {
992           case LINUX_TCP_NODELAY:
993                     return TCP_NODELAY;
994           case LINUX_TCP_MAXSEG:
995                     return TCP_MAXSEG;
996           default:
997                     return -1;
998           }
999 }
1000 
1001 /*
1002  * Convert Linux UDP level socket option number to NetBSD values.
1003  */
1004 int
linux_to_bsd_udp_sockopt(int lopt)1005 linux_to_bsd_udp_sockopt(int lopt)
1006 {
1007 
1008           switch (lopt) {
1009           default:
1010                     return -1;
1011           }
1012 }
1013 
1014 /*
1015  * Another reasonably straightforward function: setsockopt(2).
1016  * The level and option numbers are converted; the values passed
1017  * are not (yet) converted, the ones currently implemented don't
1018  * need conversion, as they are the same on both systems.
1019  */
1020 int
linux_sys_setsockopt(struct lwp * l,const struct linux_sys_setsockopt_args * uap,register_t * retval)1021 linux_sys_setsockopt(struct lwp *l, const struct linux_sys_setsockopt_args *uap, register_t *retval)
1022 {
1023           /* {
1024                     syscallarg(int) s;
1025                     syscallarg(int) level;
1026                     syscallarg(int) optname;
1027                     syscallarg(void *) optval;
1028                     syscallarg(int) optlen;
1029           } */
1030           struct sys_setsockopt_args bsa;
1031           int name;
1032 
1033           SCARG(&bsa, s) = SCARG(uap, s);
1034           SCARG(&bsa, level) = linux_to_bsd_sopt_level(SCARG(uap, level));
1035           SCARG(&bsa, val) = SCARG(uap, optval);
1036           SCARG(&bsa, valsize) = SCARG(uap, optlen);
1037 
1038           /*
1039            * Linux supports only SOL_SOCKET for AF_LOCAL domain sockets
1040            * and returns EOPNOTSUPP for other levels
1041            */
1042           if (SCARG(&bsa, level) != SOL_SOCKET) {
1043                     struct socket *so;
1044                     int error, family;
1045 
1046                     /* fd_getsock() will use the descriptor for us */
1047                     if ((error = fd_getsock(SCARG(&bsa, s), &so)) != 0)
1048                               return error;
1049                     family = so->so_proto->pr_domain->dom_family;
1050                     fd_putfile(SCARG(&bsa, s));
1051 
1052                     if (family == AF_LOCAL)
1053                               return EOPNOTSUPP;
1054           }
1055 
1056           switch (SCARG(&bsa, level)) {
1057           case SOL_SOCKET:
1058                     name = linux_to_bsd_so_sockopt(SCARG(uap, optname));
1059                     break;
1060           case IPPROTO_IP:
1061                     name = linux_to_bsd_ip_sockopt(SCARG(uap, optname));
1062                     break;
1063 #ifdef INET6
1064           case IPPROTO_IPV6:
1065                     name = linux_to_bsd_ipv6_sockopt(SCARG(uap, optname));
1066                     break;
1067 #endif
1068           case IPPROTO_TCP:
1069                     name = linux_to_bsd_tcp_sockopt(SCARG(uap, optname));
1070                     break;
1071           case IPPROTO_UDP:
1072                     name = linux_to_bsd_udp_sockopt(SCARG(uap, optname));
1073                     break;
1074           default:
1075                     return EINVAL;
1076           }
1077 
1078           if (name == -1)
1079                     return EINVAL;
1080           if (name == -2)
1081                     return 0;
1082           SCARG(&bsa, name) = name;
1083 
1084           return sys_setsockopt(l, &bsa, retval);
1085 }
1086 
1087 /*
1088  * getsockopt(2) is very much the same as setsockopt(2) (see above)
1089  */
1090 int
linux_sys_getsockopt(struct lwp * l,const struct linux_sys_getsockopt_args * uap,register_t * retval)1091 linux_sys_getsockopt(struct lwp *l, const struct linux_sys_getsockopt_args *uap, register_t *retval)
1092 {
1093           /* {
1094                     syscallarg(int) s;
1095                     syscallarg(int) level;
1096                     syscallarg(int) optname;
1097                     syscallarg(void *) optval;
1098                     syscallarg(int *) optlen;
1099           } */
1100           struct sys_getsockopt_args bga;
1101           int name;
1102 
1103           SCARG(&bga, s) = SCARG(uap, s);
1104           SCARG(&bga, level) = linux_to_bsd_sopt_level(SCARG(uap, level));
1105           SCARG(&bga, val) = SCARG(uap, optval);
1106           SCARG(&bga, avalsize) = (socklen_t *)SCARG(uap, optlen);
1107 
1108           switch (SCARG(&bga, level)) {
1109           case SOL_SOCKET:
1110                     name = linux_to_bsd_so_sockopt(SCARG(uap, optname));
1111                     break;
1112           case IPPROTO_IP:
1113                     name = linux_to_bsd_ip_sockopt(SCARG(uap, optname));
1114                     break;
1115 #ifdef INET6
1116           case IPPROTO_IPV6:
1117                     name = linux_to_bsd_ipv6_sockopt(SCARG(uap, optname));
1118                     break;
1119 #endif
1120           case IPPROTO_TCP:
1121                     name = linux_to_bsd_tcp_sockopt(SCARG(uap, optname));
1122                     break;
1123           case IPPROTO_UDP:
1124                     name = linux_to_bsd_udp_sockopt(SCARG(uap, optname));
1125                     break;
1126           default:
1127                     return EINVAL;
1128           }
1129 
1130           if (name == -1)
1131                     return EINVAL;
1132           SCARG(&bga, name) = name;
1133 
1134           return sys_getsockopt(l, &bga, retval);
1135 }
1136 
1137 int
linux_getifname(struct lwp * l,register_t * retval,void * data)1138 linux_getifname(struct lwp *l, register_t *retval, void *data)
1139 {
1140           struct ifnet *ifp;
1141           struct linux_ifreq ifr;
1142           int error;
1143           int s;
1144 
1145           error = copyin(data, &ifr, sizeof(ifr));
1146           if (error)
1147                     return error;
1148 
1149           s = pserialize_read_enter();
1150           ifp = if_byindex(ifr.ifr_ifru.ifru_ifindex);
1151           if (ifp == NULL) {
1152                     pserialize_read_exit(s);
1153                     return ENODEV;
1154           }
1155 
1156           strncpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name));
1157           pserialize_read_exit(s);
1158 
1159           return copyout(&ifr, data, sizeof(ifr));
1160 }
1161 
1162 int
linux_getifconf(struct lwp * l,register_t * retval,void * data)1163 linux_getifconf(struct lwp *l, register_t *retval, void *data)
1164 {
1165           struct linux_ifreq ifr, *ifrp = NULL;
1166           struct linux_ifconf ifc;
1167           struct ifnet *ifp;
1168           struct sockaddr *sa;
1169           struct osockaddr *osa;
1170           int space = 0, error;
1171           const int sz = (int)sizeof(ifr);
1172           bool docopy;
1173           int s;
1174           int bound;
1175           struct psref psref;
1176 
1177           error = copyin(data, &ifc, sizeof(ifc));
1178           if (error)
1179                     return error;
1180 
1181           docopy = ifc.ifc_req != NULL;
1182           if (docopy) {
1183                     if (ifc.ifc_len < 0)
1184                               return EINVAL;
1185 
1186                     space = ifc.ifc_len;
1187                     ifrp = ifc.ifc_req;
1188           }
1189           memset(&ifr, 0, sizeof(ifr));
1190 
1191           bound = curlwp_bind();
1192           s = pserialize_read_enter();
1193           IFNET_READER_FOREACH(ifp) {
1194                     struct ifaddr *ifa;
1195                     if_acquire(ifp, &psref);
1196                     pserialize_read_exit(s);
1197 
1198                     (void)strncpy(ifr.ifr_name, ifp->if_xname,
1199                         sizeof(ifr.ifr_name));
1200                     if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
1201                               error = ENAMETOOLONG;
1202                               goto release_exit;
1203                     }
1204 
1205                     s = pserialize_read_enter();
1206                     IFADDR_READER_FOREACH(ifa, ifp) {
1207                               struct psref psref_ifa;
1208                               ifa_acquire(ifa, &psref_ifa);
1209                               pserialize_read_exit(s);
1210 
1211                               sa = ifa->ifa_addr;
1212                               if (sa->sa_family != AF_INET ||
1213                                   sa->sa_len > sizeof(*osa))
1214                                         goto next;
1215                               memcpy(&ifr.ifr_addr, sa, sa->sa_len);
1216                               osa = (struct osockaddr *)&ifr.ifr_addr;
1217                               osa->sa_family = sa->sa_family;
1218                               if (space >= sz) {
1219                                         error = copyout(&ifr, ifrp, sz);
1220                                         if (error != 0) {
1221                                                   ifa_release(ifa, &psref_ifa);
1222                                                   goto release_exit;
1223                                         }
1224                                         ifrp++;
1225                               }
1226                               space -= sz;
1227                     next:
1228                               s = pserialize_read_enter();
1229                               ifa_release(ifa, &psref_ifa);
1230                     }
1231 
1232                     KASSERT(pserialize_in_read_section());
1233                     if_release(ifp, &psref);
1234           }
1235           pserialize_read_exit(s);
1236           curlwp_bindx(bound);
1237 
1238           if (docopy)
1239                     ifc.ifc_len -= space;
1240           else
1241                     ifc.ifc_len = -space;
1242 
1243           return copyout(&ifc, data, sizeof(ifc));
1244 
1245 release_exit:
1246           if_release(ifp, &psref);
1247           curlwp_bindx(bound);
1248           return error;
1249 }
1250 
1251 int
linux_getifhwaddr(struct lwp * l,register_t * retval,u_int fd,void * data)1252 linux_getifhwaddr(struct lwp *l, register_t *retval, u_int fd,
1253     void *data)
1254 {
1255           /* Not the full structure, just enough to map what we do here */
1256           struct linux_ifreq lreq;
1257           file_t *fp;
1258           struct ifaddr *ifa;
1259           struct ifnet *ifp;
1260           struct sockaddr_dl *sadl;
1261           int error, found;
1262           int index, ifnum;
1263           int s;
1264 
1265           /*
1266            * We can't emulate this ioctl by calling sys_ioctl() to run
1267            * SIOCGIFCONF, because the user buffer is not of the right
1268            * type to take those results.  We can't use kernel buffers to
1269            * receive the results, as the implementation of sys_ioctl()
1270            * and ifconf() [which implements SIOCGIFCONF] use
1271            * copyin()/copyout() which will fail on kernel addresses.
1272            *
1273            * So, we must duplicate code from sys_ioctl() and ifconf().  Ugh.
1274            */
1275 
1276           if ((fp = fd_getfile(fd)) == NULL)
1277                     return (EBADF);
1278 
1279           KERNEL_LOCK(1, NULL);
1280 
1281           if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
1282                     error = EBADF;
1283                     goto out;
1284           }
1285 
1286           error = copyin(data, &lreq, sizeof(lreq));
1287           if (error)
1288                     goto out;
1289           lreq.ifr_name[LINUX_IFNAMSIZ-1] = '\0';           /* just in case */
1290 
1291           /*
1292            * Try real interface name first, then fake "ethX"
1293            */
1294           found = 0;
1295           s = pserialize_read_enter();
1296           IFNET_READER_FOREACH(ifp) {
1297                     if (found)
1298                               break;
1299                     if (strcmp(lreq.ifr_name, ifp->if_xname))
1300                               /* not this interface */
1301                               continue;
1302 
1303                     found=1;
1304                     if (IFADDR_READER_EMPTY(ifp)) {
1305                               pserialize_read_exit(s);
1306                               error = ENODEV;
1307                               goto out;
1308                     }
1309                     IFADDR_READER_FOREACH(ifa, ifp) {
1310                               sadl = satosdl(ifa->ifa_addr);
1311                               /* only return ethernet addresses */
1312                               /* XXX what about FDDI, etc. ? */
1313                               if (sadl->sdl_family != AF_LINK ||
1314                                   sadl->sdl_type != IFT_ETHER)
1315                                         continue;
1316                               memcpy(&lreq.ifr_hwaddr.sa_data, CLLADDR(sadl),
1317                                      MIN(sadl->sdl_alen,
1318                                            sizeof(lreq.ifr_hwaddr.sa_data)));
1319                               lreq.ifr_hwaddr.sa_family =
1320                                         sadl->sdl_family;
1321                               pserialize_read_exit(s);
1322 
1323                               error = copyout(&lreq, data, sizeof(lreq));
1324                               goto out;
1325                     }
1326           }
1327           pserialize_read_exit(s);
1328 
1329           if (strncmp(lreq.ifr_name, "eth", 3) != 0) {
1330                     /* unknown interface, not even an "eth*" name */
1331                     error = ENODEV;
1332                     goto out;
1333           }
1334 
1335           for (ifnum = 0, index = 3;
1336                index < LINUX_IFNAMSIZ && lreq.ifr_name[index] != '\0';
1337                index++) {
1338                     ifnum *= 10;
1339                     ifnum += lreq.ifr_name[index] - '0';
1340           }
1341 
1342           error = EINVAL;                         /* in case we don't find one */
1343           s = pserialize_read_enter();
1344           IFNET_READER_FOREACH(ifp) {
1345                     memcpy(lreq.ifr_name, ifp->if_xname,
1346                            MIN(LINUX_IFNAMSIZ, IFNAMSIZ));
1347                     IFADDR_READER_FOREACH(ifa, ifp) {
1348                               sadl = satosdl(ifa->ifa_addr);
1349                               /* only return ethernet addresses */
1350                               /* XXX what about FDDI, etc. ? */
1351                               if (sadl->sdl_family != AF_LINK ||
1352                                   sadl->sdl_type != IFT_ETHER)
1353                                         continue;
1354                               if (ifnum--)
1355                                         /* not the requested iface */
1356                                         continue;
1357                               memcpy(&lreq.ifr_hwaddr.sa_data,
1358                                      CLLADDR(sadl),
1359                                      MIN(sadl->sdl_alen,
1360                                            sizeof(lreq.ifr_hwaddr.sa_data)));
1361                               lreq.ifr_hwaddr.sa_family =
1362                                         sadl->sdl_family;
1363                               pserialize_read_exit(s);
1364 
1365                               error = copyout(&lreq, data, sizeof(lreq));
1366                               goto out;
1367                     }
1368           }
1369           pserialize_read_exit(s);
1370 
1371 out:
1372           KERNEL_UNLOCK_ONE(NULL);
1373           fd_putfile(fd);
1374           return error;
1375 }
1376 
1377 int
linux_ioctl_socket(struct lwp * l,const struct linux_sys_ioctl_args * uap,register_t * retval)1378 linux_ioctl_socket(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
1379 {
1380           /* {
1381                     syscallarg(int) fd;
1382                     syscallarg(u_long) com;
1383                     syscallarg(void *) data;
1384           } */
1385           u_long com;
1386           int error = 0, isdev = 0, dosys = 1;
1387           struct sys_ioctl_args ia;
1388           file_t *fp;
1389           struct vnode *vp;
1390           int (*ioctlf)(file_t *, u_long, void *);
1391           struct ioctl_pt pt;
1392 
1393           if ((fp = fd_getfile(SCARG(uap, fd))) == NULL)
1394                     return (EBADF);
1395 
1396           if (fp->f_type == DTYPE_VNODE) {
1397                     vp = (struct vnode *)fp->f_data;
1398                     isdev = vp->v_type == VCHR;
1399           }
1400 
1401           /*
1402            * Don't try to interpret socket ioctl calls that are done
1403            * on a device filedescriptor, just pass them through, to
1404            * emulate Linux behaviour. Use PTIOCLINUX so that the
1405            * device will only handle these if it's prepared to do
1406            * so, to avoid unexpected things from happening.
1407            */
1408           if (isdev) {
1409                     dosys = 0;
1410                     ioctlf = fp->f_ops->fo_ioctl;
1411                     pt.com = SCARG(uap, com);
1412                     pt.data = SCARG(uap, data);
1413                     error = ioctlf(fp, PTIOCLINUX, &pt);
1414                     /*
1415                      * XXX hack: if the function returns EJUSTRETURN,
1416                      * it has stuffed a sysctl return value in pt.data.
1417                      */
1418                     if (error == EJUSTRETURN) {
1419                               retval[0] = (register_t)pt.data;
1420                               error = 0;
1421                     }
1422                     goto out;
1423           }
1424 
1425           com = SCARG(uap, com);
1426           retval[0] = 0;
1427 
1428           switch (com) {
1429           case LINUX_SIOCGIFNAME:
1430                     error = linux_getifname(l, retval, SCARG(uap, data));
1431                     dosys = 0;
1432                     break;
1433           case LINUX_SIOCGIFCONF:
1434                     error = linux_getifconf(l, retval, SCARG(uap, data));
1435                     dosys = 0;
1436                     break;
1437           case LINUX_SIOCGIFFLAGS:
1438                     SCARG(&ia, com) = OSIOCGIFFLAGS;
1439                     break;
1440           case LINUX_SIOCSIFFLAGS:
1441                     SCARG(&ia, com) = OSIOCSIFFLAGS;
1442                     break;
1443           case LINUX_SIOCGIFADDR:
1444                     SCARG(&ia, com) = OOSIOCGIFADDR;
1445                     break;
1446           case LINUX_SIOCGIFDSTADDR:
1447                     SCARG(&ia, com) = OOSIOCGIFDSTADDR;
1448                     break;
1449           case LINUX_SIOCGIFBRDADDR:
1450                     SCARG(&ia, com) = OOSIOCGIFBRDADDR;
1451                     break;
1452           case LINUX_SIOCGIFNETMASK:
1453                     SCARG(&ia, com) = OOSIOCGIFNETMASK;
1454                     break;
1455           case LINUX_SIOCGIFMTU:
1456                     SCARG(&ia, com) = OSIOCGIFMTU;
1457                     break;
1458           case LINUX_SIOCADDMULTI:
1459                     SCARG(&ia, com) = OSIOCADDMULTI;
1460                     break;
1461           case LINUX_SIOCDELMULTI:
1462                     SCARG(&ia, com) = OSIOCDELMULTI;
1463                     break;
1464           case LINUX_SIOCGIFHWADDR:
1465                     error = linux_getifhwaddr(l, retval, SCARG(uap, fd),
1466                         SCARG(uap, data));
1467                     dosys = 0;
1468                     break;
1469           default:
1470                     error = EINVAL;
1471           }
1472 
1473  out:
1474           fd_putfile(SCARG(uap, fd));
1475 
1476           if (error ==0 && dosys) {
1477                     SCARG(&ia, fd) = SCARG(uap, fd);
1478                     SCARG(&ia, data) = SCARG(uap, data);
1479                     error = sys_ioctl(curlwp, &ia, retval);
1480           }
1481 
1482           return error;
1483 }
1484 
1485 int
linux_sys_connect(struct lwp * l,const struct linux_sys_connect_args * uap,register_t * retval)1486 linux_sys_connect(struct lwp *l, const struct linux_sys_connect_args *uap, register_t *retval)
1487 {
1488           /* {
1489                     syscallarg(int) s;
1490                     syscallarg(const struct sockaddr *) name;
1491                     syscallarg(int) namelen;
1492           } */
1493           int                 error;
1494           struct sockaddr_big sb;
1495 
1496           error = linux_get_sa(l, SCARG(uap, s), &sb, SCARG(uap, name),
1497               SCARG(uap, namelen));
1498           if (error)
1499                     return (error);
1500 
1501           error = do_sys_connect(l, SCARG(uap, s), (struct sockaddr *)&sb);
1502 
1503           if (error == EISCONN) {
1504                     struct socket *so;
1505                     int state, prflags;
1506 
1507                     /* fd_getsock() will use the descriptor for us */
1508                     if (fd_getsock(SCARG(uap, s), &so) != 0)
1509                               return EISCONN;
1510 
1511                     solock(so);
1512                     state = so->so_state;
1513                     prflags = so->so_proto->pr_flags;
1514                     sounlock(so);
1515                     fd_putfile(SCARG(uap, s));
1516                     /*
1517                      * We should only let this call succeed once per
1518                      * non-blocking connect; however we don't have
1519                      * a convenient place to keep that state..
1520                      */
1521                     if ((state & (SS_ISCONNECTED|SS_NBIO)) ==
1522                         (SS_ISCONNECTED|SS_NBIO) &&
1523                         (prflags & PR_CONNREQUIRED))
1524                               return 0;
1525           }
1526 
1527           return (error);
1528 }
1529 
1530 int
linux_sys_bind(struct lwp * l,const struct linux_sys_bind_args * uap,register_t * retval)1531 linux_sys_bind(struct lwp *l, const struct linux_sys_bind_args *uap, register_t *retval)
1532 {
1533           /* {
1534                     syscallarg(int) s;
1535                     syscallarg(const struct osockaddr *) name;
1536                     syscallarg(int) namelen;
1537           } */
1538           int                 error;
1539           struct sockaddr_big sb;
1540 
1541           error = linux_get_sa(l, SCARG(uap, s), &sb, SCARG(uap, name),
1542               SCARG(uap, namelen));
1543           if (error)
1544                     return (error);
1545 
1546           return do_sys_bind(l, SCARG(uap, s), (struct sockaddr *)&sb);
1547 }
1548 
1549 int
linux_sys_getsockname(struct lwp * l,const struct linux_sys_getsockname_args * uap,register_t * retval)1550 linux_sys_getsockname(struct lwp *l, const struct linux_sys_getsockname_args *uap, register_t *retval)
1551 {
1552           /* {
1553                     syscallarg(int) fdes;
1554                     syscallarg(void *) asa;
1555                     syscallarg(int *) alen;
1556           } */
1557           int error;
1558 
1559           if ((error = sys_getsockname(l, (const void *)uap, retval)) != 0)
1560                     return (error);
1561 
1562           if ((error = linux_sa_put((struct osockaddr *)SCARG(uap, asa))))
1563                     return (error);
1564 
1565           return (0);
1566 }
1567 
1568 int
linux_sys_getpeername(struct lwp * l,const struct linux_sys_getpeername_args * uap,register_t * retval)1569 linux_sys_getpeername(struct lwp *l, const struct linux_sys_getpeername_args *uap, register_t *retval)
1570 {
1571           /* {
1572                     syscallarg(int) fdes;
1573                     syscallarg(void *) asa;
1574                     syscallarg(int *) alen;
1575           } */
1576           int error;
1577 
1578           if ((error = sys_getpeername(l, (const void *)uap, retval)) != 0)
1579                     return (error);
1580 
1581           if ((error = linux_sa_put((struct osockaddr *)SCARG(uap, asa))))
1582                     return (error);
1583 
1584           return (0);
1585 }
1586 
1587 /*
1588  * Copy the osockaddr structure pointed to by name to sb, adjust
1589  * family and convert to sockaddr.
1590  */
1591 static int
linux_get_sa(struct lwp * l,int s,struct sockaddr_big * sb,const struct osockaddr * name,socklen_t namelen)1592 linux_get_sa(struct lwp *l, int s, struct sockaddr_big *sb,
1593     const struct osockaddr *name, socklen_t namelen)
1594 {
1595           int error, bdom;
1596 
1597           if (namelen > UCHAR_MAX ||
1598               namelen <= offsetof(struct sockaddr_big, sb_data))
1599                     return EINVAL;
1600 
1601           error = copyin(name, sb, namelen);
1602           if (error)
1603                     return error;
1604 
1605           bdom = linux_to_bsd_domain(sb->sb_family);
1606           if (bdom == -1)
1607                     return EINVAL;
1608 
1609           /*
1610            * If the family is unspecified, use address family of the socket.
1611            * This avoid triggering strict family checks in netinet/in_pcb.c et.al.
1612            */
1613           if (bdom == AF_UNSPEC) {
1614                     struct socket *so;
1615 
1616                     /* fd_getsock() will use the descriptor for us */
1617                     if ((error = fd_getsock(s, &so)) != 0)
1618                               return error;
1619 
1620                     bdom = so->so_proto->pr_domain->dom_family;
1621                     fd_putfile(s);
1622           }
1623 
1624           /*
1625            * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
1626            * which lacks the scope id compared with RFC2553 one. If we detect
1627            * the situation, reject the address and write a message to system log.
1628            *
1629            * Still accept addresses for which the scope id is not used.
1630            */
1631           if (bdom == AF_INET6 &&
1632               namelen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
1633                     struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sb;
1634                     if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) &&
1635                         (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) ||
1636                          IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) ||
1637                          IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) ||
1638                          IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
1639                          IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
1640                               struct proc *p = l->l_proc;
1641                               int uid = l->l_cred ? kauth_cred_geteuid(l->l_cred) : -1;
1642 
1643                               log(LOG_DEBUG,
1644                                   "pid %d (%s), uid %d: obsolete pre-RFC2553 "
1645                                   "sockaddr_in6 rejected",
1646                                   p->p_pid, p->p_comm, uid);
1647                               return EINVAL;
1648                     }
1649                     namelen = sizeof(struct sockaddr_in6);
1650                     sin6->sin6_scope_id = 0;
1651           }
1652 
1653           /*
1654            * Linux is less strict than NetBSD and permits namelen to be larger
1655            * than valid struct sockaddr_in*.  If this is the case, truncate
1656            * the value to the correct size, so that NetBSD networking does not
1657            * return an error.
1658            */
1659           switch (bdom) {
1660           case AF_INET:
1661                     namelen = MIN(namelen, sizeof(struct sockaddr_in));
1662                     break;
1663           case AF_INET6:
1664                     namelen = MIN(namelen, sizeof(struct sockaddr_in6));
1665                     break;
1666           }
1667 
1668           sb->sb_family = bdom;
1669           sb->sb_len = namelen;
1670           ktrkuser("mbsoname", sb, namelen);
1671           return 0;
1672 }
1673 
1674 static int
linux_sa_put(struct osockaddr * osa)1675 linux_sa_put(struct osockaddr *osa)
1676 {
1677           struct sockaddr sa;
1678           struct osockaddr *kosa;
1679           int error, bdom, len;
1680 
1681           /*
1682            * Only read/write the sockaddr family and length part, the rest is
1683            * not changed.
1684            */
1685           len = sizeof(sa.sa_len) + sizeof(sa.sa_family);
1686 
1687           error = copyin(osa, &sa, len);
1688           if (error)
1689                     return (error);
1690 
1691           bdom = bsd_to_linux_domain(sa.sa_family);
1692           if (bdom == -1)
1693                     return (EINVAL);
1694 
1695           /* Note: we convert from sockaddr to osockaddr here, too */
1696           kosa = (struct osockaddr *) &sa;
1697           kosa->sa_family = bdom;
1698           error = copyout(kosa, osa, len);
1699           if (error)
1700                     return (error);
1701 
1702           return (0);
1703 }
1704 
1705 #if !defined(__aarch64__) && !defined(__amd64__)
1706 int
linux_sys_recv(struct lwp * l,const struct linux_sys_recv_args * uap,register_t * retval)1707 linux_sys_recv(struct lwp *l, const struct linux_sys_recv_args *uap, register_t *retval)
1708 {
1709           /* {
1710                     syscallarg(int) s;
1711                     syscallarg(void *) buf;
1712                     syscallarg(int) len;
1713                     syscallarg(int) flags;
1714           } */
1715           struct sys_recvfrom_args bra;
1716 
1717 
1718           SCARG(&bra, s) = SCARG(uap, s);
1719           SCARG(&bra, buf) = SCARG(uap, buf);
1720           SCARG(&bra, len) = (size_t) SCARG(uap, len);
1721           SCARG(&bra, flags) = SCARG(uap, flags);
1722           SCARG(&bra, from) = NULL;
1723           SCARG(&bra, fromlenaddr) = NULL;
1724 
1725           return (sys_recvfrom(l, &bra, retval));
1726 }
1727 
1728 int
linux_sys_send(struct lwp * l,const struct linux_sys_send_args * uap,register_t * retval)1729 linux_sys_send(struct lwp *l, const struct linux_sys_send_args *uap, register_t *retval)
1730 {
1731           /* {
1732                     syscallarg(int) s;
1733                     syscallarg(void *) buf;
1734                     syscallarg(int) len;
1735                     syscallarg(int) flags;
1736           } */
1737           struct sys_sendto_args bsa;
1738 
1739           SCARG(&bsa, s)                = SCARG(uap, s);
1740           SCARG(&bsa, buf)    = SCARG(uap, buf);
1741           SCARG(&bsa, len)    = SCARG(uap, len);
1742           SCARG(&bsa, flags)  = SCARG(uap, flags);
1743           SCARG(&bsa, to)               = NULL;
1744           SCARG(&bsa, tolen)  = 0;
1745 
1746           return (sys_sendto(l, &bsa, retval));
1747 }
1748 #endif
1749 
1750 int
linux_sys_accept(struct lwp * l,const struct linux_sys_accept_args * uap,register_t * retval)1751 linux_sys_accept(struct lwp *l, const struct linux_sys_accept_args *uap, register_t *retval)
1752 {
1753           /* {
1754                     syscallarg(int) s;
1755                     syscallarg(struct osockaddr *) name;
1756                     syscallarg(int *) anamelen;
1757           } */
1758           int error;
1759           struct sys_accept_args baa;
1760 
1761           SCARG(&baa, s)                = SCARG(uap, s);
1762           SCARG(&baa, name)   = (struct sockaddr *) SCARG(uap, name);
1763           SCARG(&baa, anamelen)         = (unsigned int *) SCARG(uap, anamelen);
1764 
1765           if ((error = sys_accept(l, &baa, retval)))
1766                     return (error);
1767 
1768           if (SCARG(uap, name) && (error = linux_sa_put(SCARG(uap, name))))
1769                     return (error);
1770 
1771           return (0);
1772 }
1773 
1774 int
linux_sys_accept4(struct lwp * l,const struct linux_sys_accept4_args * uap,register_t * retval)1775 linux_sys_accept4(struct lwp *l, const struct linux_sys_accept4_args *uap, register_t *retval)
1776 {
1777           /* {
1778                     syscallarg(int) s;
1779                     syscallarg(struct osockaddr *) name;
1780                     syscallarg(int *) anamelen;
1781                     syscallarg(int) flags;
1782           } */
1783           int error, flags;
1784           struct sockaddr_big name;
1785 
1786           if ((flags = linux_to_bsd_type(SCARG(uap, flags))) == -1)
1787                     return EINVAL;
1788 
1789           name.sb_len = UCHAR_MAX;
1790           error = do_sys_accept(l, SCARG(uap, s), (struct sockaddr *)&name,
1791               retval, NULL, flags, 0);
1792           if (error != 0)
1793                     return error;
1794 
1795           error = copyout_sockname_sb((struct sockaddr *)SCARG(uap, name),
1796               SCARG(uap, anamelen), MSG_LENUSRSPACE, &name);
1797           if (error != 0) {
1798                     int fd = (int)*retval;
1799                     if (fd_getfile(fd) != NULL)
1800                               (void)fd_close(fd);
1801                     return error;
1802           }
1803           if (SCARG(uap, name) && (error = linux_sa_put(SCARG(uap, name))))
1804                     return error;
1805 
1806           return 0;
1807 }
1808 
1809 int
linux_sys_sendmmsg(struct lwp * l,const struct linux_sys_sendmmsg_args * uap,register_t * retval)1810 linux_sys_sendmmsg(struct lwp *l, const struct linux_sys_sendmmsg_args *uap,
1811     register_t *retval)
1812 {
1813           /* {
1814                     syscallarg(int) s;
1815                     syscallarg(struct linux_mmsghdr *) msgvec;
1816                     syscallarg(unsigned int) vlen;
1817                     syscallarg(unsigned int) flags;
1818           } */
1819           struct linux_mmsghdr lmsg;
1820           struct mmsghdr bmsg;
1821           struct socket *so;
1822           file_t *fp;
1823           struct msghdr *msg = &bmsg.msg_hdr;
1824           int error, s;
1825           unsigned int vlen, flags, dg;
1826 
1827           if ((flags = linux_to_bsd_msg_flags(SCARG(uap, flags))) == -1)
1828                     return EINVAL;
1829 
1830           flags = (flags & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
1831 
1832           s = SCARG(uap, s);
1833           if ((error = fd_getsock1(s, &so, &fp)) != 0)
1834                     return error;
1835 
1836           vlen = SCARG(uap, vlen);
1837           if (vlen > 1024)
1838                     vlen = 1024;
1839 
1840           for (dg = 0; dg < vlen;) {
1841                     error = copyin(SCARG(uap, msgvec) + dg, &lmsg, sizeof(lmsg));
1842                     if (error)
1843                               break;
1844                     linux_to_bsd_msghdr(&lmsg.msg_hdr, &bmsg.msg_hdr);
1845 
1846                     msg->msg_flags = flags;
1847 
1848                     error = do_sys_sendmsg_so(l, s, so, fp, msg, flags, retval);
1849                     if (error)
1850                               break;
1851 
1852                     ktrkuser("msghdr", msg, sizeof *msg);
1853                     lmsg.msg_len = *retval;
1854                     error = copyout(&lmsg, SCARG(uap, msgvec) + dg, sizeof(lmsg));
1855                     if (error)
1856                               break;
1857                     dg++;
1858 
1859           }
1860 
1861           *retval = dg;
1862 
1863           fd_putfile(s);
1864 
1865           /*
1866            * If we succeeded at least once, return 0.
1867            */
1868           if (dg)
1869                     return 0;
1870           return error;
1871 }
1872 
1873 int
linux_sys_recvmmsg(struct lwp * l,const struct linux_sys_recvmmsg_args * uap,register_t * retval)1874 linux_sys_recvmmsg(struct lwp *l, const struct linux_sys_recvmmsg_args *uap,
1875     register_t *retval)
1876 {
1877           /* {
1878                     syscallarg(int) s;
1879                     syscallarg(struct linux_mmsghdr *) msgvec;
1880                     syscallarg(unsigned int) vlen;
1881                     syscallarg(unsigned int) flags;
1882                     syscallarg(struct linux_timespec *) timeout;
1883           } */
1884           struct linux_mmsghdr lmsg;
1885           struct mmsghdr bmsg;
1886           struct socket *so;
1887           struct msghdr *msg = &bmsg.msg_hdr;
1888           int error, s;
1889           struct mbuf *from, *control;
1890           struct timespec ts = {0}, now;
1891           struct linux_timespec lts;
1892           unsigned int vlen, flags, dg;
1893 
1894           if (SCARG(uap, timeout)) {
1895                     error = copyin(SCARG(uap, timeout), &lts, sizeof(lts));
1896                               return error;
1897                     ts.tv_sec = lts.tv_sec;
1898                     ts.tv_nsec = lts.tv_nsec;
1899                     getnanotime(&now);
1900                     timespecadd(&now, &ts, &ts);
1901           }
1902 
1903           s = SCARG(uap, s);
1904           if ((error = fd_getsock(s, &so)) != 0)
1905                     return error;
1906 
1907           /*
1908            * If so->so_rerror holds a deferred error return it now.
1909            */
1910           if (so->so_rerror) {
1911                     error = so->so_rerror;
1912                     so->so_rerror = 0;
1913                     fd_putfile(s);
1914                     return error;
1915           }
1916 
1917           vlen = SCARG(uap, vlen);
1918           if (vlen > 1024)
1919                     vlen = 1024;
1920 
1921           from = NULL;
1922           flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;
1923 
1924           for (dg = 0; dg < vlen;) {
1925                     error = copyin(SCARG(uap, msgvec) + dg, &lmsg, sizeof(lmsg));
1926                     if (error)
1927                               break;
1928                     linux_to_bsd_msghdr(&lmsg.msg_hdr, &bmsg.msg_hdr);
1929                     msg->msg_flags = flags & ~MSG_WAITFORONE;
1930 
1931                     if (from != NULL) {
1932                               m_free(from);
1933                               from = NULL;
1934                     }
1935 
1936                     error = do_sys_recvmsg_so(l, s, so, msg, &from,
1937                         msg->msg_control != NULL ? &control : NULL, retval);
1938                     if (error) {
1939                               if (error == EAGAIN && dg > 0)
1940                                         error = 0;
1941                               break;
1942                     }
1943 
1944                     if (msg->msg_control != NULL)
1945                               error = linux_copyout_msg_control(l, msg, control);
1946                     if (error)
1947                               break;
1948 
1949                     if (from != NULL) {
1950                               mtod(from, struct osockaddr *)->sa_family =
1951                                   bsd_to_linux_domain(mtod(from,
1952                                   struct sockaddr *)->sa_family);
1953                               error = copyout_sockname(msg->msg_name,
1954                                   &msg->msg_namelen, 0, from);
1955                               if (error)
1956                                         break;
1957                     }
1958 
1959 
1960                     lmsg.msg_len = *retval;
1961                     ktrkuser("msghdr", msg, sizeof(*msg));
1962                     bsd_to_linux_msghdr(msg, &lmsg.msg_hdr);
1963                     error = copyout(&lmsg, SCARG(uap, msgvec) + dg, sizeof(lmsg));
1964                     if (error)
1965                               break;
1966 
1967                     dg++;
1968                     if (msg->msg_flags & MSG_OOB)
1969                               break;
1970 
1971                     if (SCARG(uap, timeout)) {
1972                               getnanotime(&now);
1973                               timespecsub(&now, &ts, &now);
1974                               if (now.tv_sec > 0)
1975                                         break;
1976                     }
1977 
1978                     if (flags & MSG_WAITFORONE)
1979                               flags |= MSG_DONTWAIT;
1980 
1981           }
1982 
1983           if (from != NULL)
1984                     m_free(from);
1985 
1986           *retval = dg;
1987 
1988           /*
1989            * If we succeeded at least once, return 0, hopefully so->so_rerror
1990            * will catch it next time.
1991            */
1992           if (error && dg > 0) {
1993                     so->so_rerror = error;
1994                     error = 0;
1995           }
1996 
1997           fd_putfile(s);
1998 
1999           return error;
2000 }
2001