1 /*      $NetBSD: rumpclient.c,v 1.71 2023/07/31 04:37:04 rin Exp $    */
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Client side routines for rump syscall proxy.
30  */
31 
32 #include <rump/rumpuser_port.h>
33 
34 /*
35  * We use kqueue on the BSDs, poll elsewhere.  We
36  * want to use kqueue because it will give us the ability to get signal
37  * notifications but defer their handling to a stage where we do not
38  * hold the communication lock.  Taking a signal while holding on to
39  * that lock may cause a deadlock.  Therefore, block signals throughout
40  * the RPC when using poll.  On Linux, we use signalfd in the same role
41  * as kqueue on NetBSD to be able to take signals while waiting for a
42  * response from the server.
43  */
44 
45 #if defined(__NetBSD__) || defined(__FreeBSD__) || \
46     defined(__DragonFly__) || defined(__OpenBSD__)
47 #define USE_KQUEUE
48 #endif
49 #if defined(__linux__)
50 #define USE_SIGNALFD
51 #endif
52 
53 __RCSID("$NetBSD: rumpclient.c,v 1.71 2023/07/31 04:37:04 rin Exp $");
54 
55 #include <sys/param.h>
56 #include <sys/mman.h>
57 #include <sys/socket.h>
58 #include <sys/time.h>
59 
60 #ifdef USE_KQUEUE
61 #include <sys/event.h>
62 #endif
63 
64 #include <arpa/inet.h>
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67 
68 #include <assert.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <poll.h>
73 #include <pthread.h>
74 #include <signal.h>
75 #include <stdarg.h>
76 #include <stdbool.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 
82 #include <rump/rumpclient.h>
83 
84 #define HOSTOPS
85 int       (*host_socket)(int, int, int);
86 int       (*host_close)(int);
87 int       (*host_connect)(int, const struct sockaddr *, socklen_t);
88 int       (*host_fcntl)(int, int, ...);
89 int       (*host_poll)(struct pollfd *, nfds_t, int);
90 ssize_t   (*host_read)(int, void *, size_t);
91 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
92 int       (*host_setsockopt)(int, int, int, const void *, socklen_t);
93 int       (*host_dup)(int);
94 
95 #ifdef USE_KQUEUE
96 int       (*host_kqueue)(void);
97 #ifdef __NetBSD__
98 int       (*host_kevent)(int, const struct kevent *, size_t,
99                            struct kevent *, size_t, const struct timespec *);
100 #else
101 int       (*host_kevent)(int, const struct kevent *, int,
102                            struct kevent *, int, const struct timespec *);
103 #endif
104 #endif
105 
106 #ifdef USE_SIGNALFD
107 #include <sys/signalfd.h>
108 
109 int       (*host_signalfd)(int, const sigset_t *, int);
110 #endif
111 
112 int       (*host_execve)(const char *, char *const[], char *const[]);
113 
114 #include "sp_common.c"
115 #include "rumpuser_sigtrans.c"
116 
117 static struct spclient clispc = {
118           .spc_fd = -1,
119 };
120 
121 static int holyfd = -1;
122 static sigset_t fullset;
123 
124 static int doconnect(void);
125 static int handshake_req(struct spclient *, int, void *, int, bool);
126 
127 /*
128  * Default: don't retry.  Most clients can't handle it
129  * (consider e.g. fds suddenly going missing).
130  */
131 static time_t retrytimo = 0;
132 
133 /* always defined to nothingness for now */
134 #define ERRLOG(a)
135 
136 static int
send_with_recon(struct spclient * spc,struct iovec * iov,size_t iovlen)137 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
138 {
139           struct timeval starttime, curtime;
140           time_t prevreconmsg;
141           unsigned reconretries;
142           int rv;
143 
144           for (prevreconmsg = 0, reconretries = 0;;) {
145                     rv = dosend(spc, iov, iovlen);
146                     if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
147                               /* no persistent connections */
148                               if (retrytimo == 0) {
149                                         rv = ENOTCONN;
150                                         break;
151                               }
152                               if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
153                                         _exit(1);
154 
155                               if (!prevreconmsg) {
156                                         prevreconmsg = time(NULL);
157                                         gettimeofday(&starttime, NULL);
158                               }
159                               if (reconretries == 1) {
160                                         if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
161                                                   rv = ENOTCONN;
162                                                   break;
163                                         }
164                                         fprintf(stderr, "rump_sp: connection to "
165                                             "kernel lost, trying to reconnect ...\n");
166                               } else if (time(NULL) - prevreconmsg > 120) {
167                                         fprintf(stderr, "rump_sp: still trying to "
168                                             "reconnect ...\n");
169                                         prevreconmsg = time(NULL);
170                               }
171 
172                               /* check that we aren't over the limit */
173                               if (retrytimo > 0) {
174                                         time_t tdiff;
175 
176                                         gettimeofday(&curtime, NULL);
177                                         tdiff = curtime.tv_sec - starttime.tv_sec;
178                                         if (starttime.tv_usec > curtime.tv_usec)
179                                                   tdiff--;
180                                         if (tdiff >= retrytimo) {
181                                                   fprintf(stderr, "rump_sp: reconnect "
182                                                       "failed, %lld second timeout\n",
183                                                       (long long)retrytimo);
184                                                   return ENOTCONN;
185                                         }
186                               }
187 
188                               /* adhoc backoff timer */
189                               if (reconretries < 10) {
190                                         usleep(100000 * reconretries);
191                               } else {
192                                         sleep(MIN(10, reconretries-9));
193                               }
194                               reconretries++;
195 
196                               if ((rv = doconnect()) != 0)
197                                         continue;
198                               if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
199                                   NULL, 0, true)) != 0)
200                                         continue;
201 
202                               /*
203                                * ok, reconnect successful.  we need to return to
204                                * the upper layer to get the entire PDU resent.
205                                */
206                               if (reconretries != 1)
207                                         fprintf(stderr, "rump_sp: reconnected!\n");
208                               rv = EAGAIN;
209                               break;
210                     } else {
211                               _DIAGASSERT(errno != EAGAIN);
212                               break;
213                     }
214           }
215 
216           return rv;
217 }
218 
219 static int
cliwaitresp(struct spclient * spc,struct respwait * rw,sigset_t * mask,bool keeplock)220 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
221           bool keeplock)
222 {
223           uint64_t mygen;
224           bool imalive = true;
225 
226           pthread_mutex_lock(&spc->spc_mtx);
227           if (!keeplock)
228                     sendunlockl(spc);
229           mygen = spc->spc_generation;
230 
231           rw->rw_error = 0;
232           while (!rw->rw_done && rw->rw_error == 0) {
233                     if (__predict_false(spc->spc_generation != mygen || !imalive))
234                               break;
235 
236                     /* are we free to receive? */
237                     if (spc->spc_istatus == SPCSTATUS_FREE) {
238                               int gotresp, dosig, rv;
239 
240                               spc->spc_istatus = SPCSTATUS_BUSY;
241                               pthread_mutex_unlock(&spc->spc_mtx);
242 
243                               dosig = 0;
244                               for (gotresp = 0; !gotresp; ) {
245 #ifdef USE_KQUEUE
246                                         struct kevent kev[8];
247                                         int i;
248 
249                                         /*
250                                          * typically we don't have a frame waiting
251                                          * when we come in here, so call kevent now
252                                          */
253                                         rv = host_kevent(holyfd, NULL, 0,
254                                             kev, __arraycount(kev), NULL);
255 
256                                         if (__predict_false(rv == -1)) {
257                                                   goto activity;
258                                         }
259 
260                                         /*
261                                          * XXX: don't know how this can happen
262                                          * (timeout cannot expire since there
263                                          * isn't one), but it does happen.
264                                          * treat it as an expectional condition
265                                          * and go through tryread to determine
266                                          * alive status.
267                                          */
268                                         if (__predict_false(rv == 0))
269                                                   goto activity;
270 
271                                         for (i = 0; i < rv; i++) {
272                                                   if (kev[i].filter == EVFILT_SIGNAL)
273                                                             dosig++;
274                                         }
275                                         if (dosig)
276                                                   goto cleanup;
277 
278                                         /*
279                                          * ok, activity.  try to read a frame to
280                                          * determine what happens next.
281                                          */
282  activity:
283 #else /* !USE_KQUEUE */
284                                         struct pollfd pfd[2];
285 
286                                         pfd[0].fd = clispc.spc_fd;
287                                         pfd[0].events = POLLIN;
288                                         pfd[1].fd = holyfd;
289                                         pfd[1].events = POLLIN;
290 
291                                         rv = host_poll(pfd, 2, -1);
292                                         if (rv >= 1 && pfd[1].revents & POLLIN) {
293                                                   dosig = 1;
294                                                   goto cleanup;
295                                         }
296 #endif /* !USE_KQUEUE */
297 
298                                         switch (readframe(spc)) {
299                                         case 0:
300                                                   continue;
301                                         case -1:
302                                                   imalive = false;
303                                                   goto cleanup;
304                                         default:
305                                                   /* case 1 */
306                                                   break;
307                                         }
308 
309                                         switch (spc->spc_hdr.rsp_class) {
310                                         case RUMPSP_RESP:
311                                         case RUMPSP_ERROR:
312                                                   kickwaiter(spc);
313                                                   gotresp = spc->spc_hdr.rsp_reqno ==
314                                                       rw->rw_reqno;
315                                                   break;
316                                         case RUMPSP_REQ:
317                                                   handlereq(spc);
318                                                   break;
319                                         default:
320                                                   /* panic */
321                                                   break;
322                                         }
323                               }
324 
325  cleanup:
326                               pthread_mutex_lock(&spc->spc_mtx);
327                               if (spc->spc_istatus == SPCSTATUS_WANTED)
328                                         kickall(spc);
329                               spc->spc_istatus = SPCSTATUS_FREE;
330 
331                               /* take one for the team */
332                               if (dosig) {
333                                         pthread_mutex_unlock(&spc->spc_mtx);
334                                         pthread_sigmask(SIG_SETMASK, mask, NULL);
335                                         pthread_sigmask(SIG_SETMASK, &fullset, NULL);
336                                         pthread_mutex_lock(&spc->spc_mtx);
337                               }
338                     } else {
339                               spc->spc_istatus = SPCSTATUS_WANTED;
340                               pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
341                     }
342           }
343           TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
344           pthread_mutex_unlock(&spc->spc_mtx);
345           pthread_cond_destroy(&rw->rw_cv);
346 
347           if (spc->spc_generation != mygen || !imalive) {
348                     return ENOTCONN;
349           }
350           return rw->rw_error;
351 }
352 
353 static int
syscall_req(struct spclient * spc,sigset_t * omask,int sysnum,const void * data,size_t dlen,void ** resp)354 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
355           const void *data, size_t dlen, void **resp)
356 {
357           struct rsp_hdr rhdr;
358           struct respwait rw;
359           struct iovec iov[2];
360           int rv;
361 
362           rhdr.rsp_len = sizeof(rhdr) + dlen;
363           rhdr.rsp_class = RUMPSP_REQ;
364           rhdr.rsp_type = RUMPSP_SYSCALL;
365           rhdr.rsp_sysnum = sysnum;
366 
367           IOVPUT(iov[0], rhdr);
368           IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
369 
370           do {
371                     putwait(spc, &rw, &rhdr);
372                     if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
373                               unputwait(spc, &rw);
374                               continue;
375                     }
376 
377                     rv = cliwaitresp(spc, &rw, omask, false);
378                     if (rv == ENOTCONN)
379                               rv = EAGAIN;
380           } while (rv == EAGAIN);
381 
382           *resp = rw.rw_data;
383           return rv;
384 }
385 
386 static int
handshake_req(struct spclient * spc,int type,void * data,int cancel,bool haslock)387 handshake_req(struct spclient *spc, int type, void *data,
388           int cancel, bool haslock)
389 {
390           struct handshake_fork rf;
391           const char *myprogname = NULL; /* XXXgcc */
392           struct rsp_hdr rhdr;
393           struct respwait rw;
394           sigset_t omask;
395           size_t bonus;
396           struct iovec iov[2];
397           int rv;
398 
399           if (type == HANDSHAKE_FORK) {
400                     bonus = sizeof(rf);
401           } else {
402 #ifdef __NetBSD__
403                     /* would procfs work on NetBSD too? */
404                     myprogname = getprogname();
405 #else
406                     int fd = open("/proc/self/comm", O_RDONLY);
407                     if (fd == -1) {
408                               myprogname = "???";
409                     } else {
410                               static char commname[128];
411 
412                               memset(commname, 0, sizeof(commname));
413                               if (read(fd, commname, sizeof(commname)) > 0) {
414                                         char *n;
415 
416                                         n = strrchr(commname, '\n');
417                                         if (n)
418                                                   *n = '\0';
419                                         myprogname = commname;
420                               } else {
421                                         myprogname = "???";
422                               }
423                               close(fd);
424                     }
425 #endif
426                     bonus = strlen(myprogname)+1;
427           }
428 
429           /* performs server handshake */
430           rhdr.rsp_len = sizeof(rhdr) + bonus;
431           rhdr.rsp_class = RUMPSP_REQ;
432           rhdr.rsp_type = RUMPSP_HANDSHAKE;
433           rhdr.rsp_handshake = type;
434 
435           IOVPUT(iov[0], rhdr);
436 
437           pthread_sigmask(SIG_SETMASK, &fullset, &omask);
438           if (haslock)
439                     putwait_locked(spc, &rw, &rhdr);
440           else
441                     putwait(spc, &rw, &rhdr);
442           if (type == HANDSHAKE_FORK) {
443                     memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
444                     rf.rf_cancel = cancel;
445                     IOVPUT(iov[1], rf);
446           } else {
447                     IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
448           }
449           rv = send_with_recon(spc, iov, __arraycount(iov));
450           if (rv || cancel) {
451                     if (haslock)
452                               unputwait_locked(spc, &rw);
453                     else
454                               unputwait(spc, &rw);
455                     if (cancel) {
456                               goto out;
457                     }
458           } else {
459                     rv = cliwaitresp(spc, &rw, &omask, haslock);
460           }
461           if (rv)
462                     goto out;
463 
464           rv = *(int *)rw.rw_data;
465           free(rw.rw_data);
466 
467  out:
468           pthread_sigmask(SIG_SETMASK, &omask, NULL);
469           return rv;
470 }
471 
472 static int
prefork_req(struct spclient * spc,sigset_t * omask,void ** resp)473 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
474 {
475           struct rsp_hdr rhdr;
476           struct respwait rw;
477           struct iovec iov[1];
478           int rv;
479 
480           rhdr.rsp_len = sizeof(rhdr);
481           rhdr.rsp_class = RUMPSP_REQ;
482           rhdr.rsp_type = RUMPSP_PREFORK;
483           rhdr.rsp_error = 0;
484 
485           IOVPUT(iov[0], rhdr);
486 
487           do {
488                     putwait(spc, &rw, &rhdr);
489                     rv = send_with_recon(spc, iov, __arraycount(iov));
490                     if (rv != 0) {
491                               unputwait(spc, &rw);
492                               continue;
493                     }
494 
495                     rv = cliwaitresp(spc, &rw, omask, false);
496                     if (rv == ENOTCONN)
497                               rv = EAGAIN;
498           } while (rv == EAGAIN);
499 
500           *resp = rw.rw_data;
501           return rv;
502 }
503 
504 /*
505  * prevent response code from deadlocking with reconnect code
506  */
507 static int
resp_sendlock(struct spclient * spc)508 resp_sendlock(struct spclient *spc)
509 {
510           int rv = 0;
511 
512           pthread_mutex_lock(&spc->spc_mtx);
513           while (spc->spc_ostatus != SPCSTATUS_FREE) {
514                     if (__predict_false(spc->spc_reconnecting)) {
515                               rv = EBUSY;
516                               goto out;
517                     }
518                     spc->spc_ostatus = SPCSTATUS_WANTED;
519                     pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
520           }
521           spc->spc_ostatus = SPCSTATUS_BUSY;
522 
523  out:
524           pthread_mutex_unlock(&spc->spc_mtx);
525           return rv;
526 }
527 
528 static void
send_copyin_resp(struct spclient * spc,uint64_t reqno,void * data,size_t dlen,int wantstr)529 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
530           int wantstr)
531 {
532           struct rsp_hdr rhdr;
533           struct iovec iov[2];
534 
535           if (wantstr)
536                     dlen = MIN(dlen, strlen(data)+1);
537 
538           rhdr.rsp_len = sizeof(rhdr) + dlen;
539           rhdr.rsp_reqno = reqno;
540           rhdr.rsp_class = RUMPSP_RESP;
541           rhdr.rsp_type = RUMPSP_COPYIN;
542           rhdr.rsp_sysnum = 0;
543 
544           IOVPUT(iov[0], rhdr);
545           IOVPUT_WITHSIZE(iov[1], data, dlen);
546 
547           if (resp_sendlock(spc) != 0)
548                     return;
549           (void)SENDIOV(spc, iov);
550           sendunlock(spc);
551 }
552 
553 static void
send_anonmmap_resp(struct spclient * spc,uint64_t reqno,void * addr)554 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
555 {
556           struct rsp_hdr rhdr;
557           struct iovec iov[2];
558 
559           rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
560           rhdr.rsp_reqno = reqno;
561           rhdr.rsp_class = RUMPSP_RESP;
562           rhdr.rsp_type = RUMPSP_ANONMMAP;
563           rhdr.rsp_sysnum = 0;
564 
565           IOVPUT(iov[0], rhdr);
566           IOVPUT(iov[1], addr);
567 
568           if (resp_sendlock(spc) != 0)
569                     return;
570           (void)SENDIOV(spc, iov);
571           sendunlock(spc);
572 }
573 
574 int
rumpclient_syscall(int sysnum,const void * data,size_t dlen,register_t * retval)575 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
576           register_t *retval)
577 {
578           struct rsp_sysresp *resp;
579           sigset_t omask;
580           void *rdata;
581           int rv;
582 
583           pthread_sigmask(SIG_SETMASK, &fullset, &omask);
584 
585           DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
586               sysnum, data, dlen));
587 
588           rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
589           if (rv)
590                     goto out;
591 
592           resp = rdata;
593           DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %"
594               PRIxREGISTER"/%"PRIxREGISTER"\n",
595               sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
596 
597           memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
598           rv = resp->rsys_error;
599           free(rdata);
600 
601  out:
602           pthread_sigmask(SIG_SETMASK, &omask, NULL);
603           return rv;
604 }
605 
606 static void
handlereq(struct spclient * spc)607 handlereq(struct spclient *spc)
608 {
609           struct rsp_copydata *copydata;
610           struct rsp_hdr *rhdr = &spc->spc_hdr;
611           void *mapaddr;
612           size_t maplen;
613           int reqtype = spc->spc_hdr.rsp_type;
614           int sig;
615 
616           switch (reqtype) {
617           case RUMPSP_COPYIN:
618           case RUMPSP_COPYINSTR:
619                     /*LINTED*/
620                     copydata = (struct rsp_copydata *)spc->spc_buf;
621                     DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
622                         copydata->rcp_addr, copydata->rcp_len));
623                     send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
624                         copydata->rcp_addr, copydata->rcp_len,
625                         reqtype == RUMPSP_COPYINSTR);
626                     break;
627           case RUMPSP_COPYOUT:
628           case RUMPSP_COPYOUTSTR:
629                     /*LINTED*/
630                     copydata = (struct rsp_copydata *)spc->spc_buf;
631                     DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
632                         copydata->rcp_addr, copydata->rcp_len));
633                     /*LINTED*/
634                     memcpy(copydata->rcp_addr, copydata->rcp_data,
635                         copydata->rcp_len);
636                     break;
637           case RUMPSP_ANONMMAP:
638                     /*LINTED*/
639                     maplen = *(size_t *)spc->spc_buf;
640                     mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
641                         MAP_ANON|MAP_PRIVATE, -1, 0);
642                     if (mapaddr == MAP_FAILED)
643                               mapaddr = NULL;
644                     DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
645                     send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
646                     break;
647           case RUMPSP_RAISE:
648                     sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
649                     DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
650                     raise(sig);
651                     /*
652                      * We most likely have signals blocked, but the signal
653                      * will be handled soon enough when we return.
654                      */
655                     break;
656           default:
657                     printf("PANIC: INVALID TYPE %d\n", reqtype);
658                     abort();
659                     break;
660           }
661 
662           spcfreebuf(spc);
663 }
664 
665 static unsigned ptab_idx;
666 static struct sockaddr *serv_sa;
667 
668 /* dup until we get a "good" fd which does not collide with stdio */
669 static int
dupgood(int myfd,int mustchange)670 dupgood(int myfd, int mustchange)
671 {
672           int ofds[4];
673           int sverrno;
674           unsigned int i;
675 
676           for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
677                     assert(i < __arraycount(ofds));
678                     ofds[i] = myfd;
679                     myfd = host_dup(myfd);
680                     if (mustchange) {
681                               i--; /* prevent closing old fd */
682                               mustchange = 0;
683                     }
684           }
685 
686           sverrno = 0;
687           if (myfd == -1 && i > 0)
688                     sverrno = errno;
689 
690           while (i-- > 0) {
691                     host_close(ofds[i]);
692           }
693 
694           if (sverrno)
695                     errno = sverrno;
696 
697           return myfd;
698 }
699 
700 #if defined(USE_KQUEUE)
701 
702 static int
makeholyfd(void)703 makeholyfd(void)
704 {
705           struct kevent kev[NSIG+1];
706           int i, fd;
707 
708           /* setup kqueue, we want all signals and the fd */
709           if ((fd = dupgood(host_kqueue(), 0)) == -1) {
710                     ERRLOG(("rump_sp: cannot setup kqueue"));
711                     return -1;
712           }
713 
714           for (i = 0; i < NSIG; i++) {
715                     EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
716           }
717           EV_SET(&kev[NSIG], clispc.spc_fd,
718               EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
719           if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
720                     ERRLOG(("rump_sp: kevent() failed"));
721                     host_close(fd);
722                     return -1;
723           }
724 
725           return fd;
726 }
727 
728 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
729 
730 static int
makeholyfd(void)731 makeholyfd(void)
732 {
733 
734           return host_signalfd(-1, &fullset, 0);
735 }
736 
737 #else /* !USE_KQUEUE && !USE_SIGNALFD */
738 
739 static int
makeholyfd(void)740 makeholyfd(void)
741 {
742 
743           return -1;
744 }
745 
746 #endif
747 
748 static int
doconnect(void)749 doconnect(void)
750 {
751           struct respwait rw;
752           struct rsp_hdr rhdr;
753           char banner[MAXBANNER];
754           int s, error, flags;
755           ssize_t n;
756 
757           if (holyfd != -1)
758                     host_close(holyfd);
759           holyfd = -1;
760           s = -1;
761 
762           if (clispc.spc_fd != -1)
763                     host_close(clispc.spc_fd);
764           clispc.spc_fd = -1;
765 
766           /*
767            * for reconnect, gate everyone out of the receiver code
768            */
769           putwait_locked(&clispc, &rw, &rhdr);
770 
771           pthread_mutex_lock(&clispc.spc_mtx);
772           clispc.spc_reconnecting = 1;
773           pthread_cond_broadcast(&clispc.spc_cv);
774           clispc.spc_generation++;
775           while (clispc.spc_istatus != SPCSTATUS_FREE) {
776                     clispc.spc_istatus = SPCSTATUS_WANTED;
777                     pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
778           }
779           kickall(&clispc);
780 
781           /*
782            * we can release it already since we hold the
783            * send lock during reconnect
784            * XXX: assert it
785            */
786           clispc.spc_istatus = SPCSTATUS_FREE;
787           pthread_mutex_unlock(&clispc.spc_mtx);
788           unputwait_locked(&clispc, &rw);
789 
790           free(clispc.spc_buf);
791           clispc.spc_off = 0;
792 
793           s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
794           if (s == -1)
795                     return -1;
796 
797           while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
798                     if (errno == EINTR)
799                               continue;
800                     ERRLOG(("rump_sp: client connect failed: %s\n",
801                         strerror(errno)));
802                     return -1;
803           }
804 
805           if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
806                     ERRLOG(("rump_sp: connect hook failed\n"));
807                     return -1;
808           }
809 
810           if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
811                     ERRLOG(("rump_sp: failed to read banner\n"));
812                     return -1;
813           }
814 
815           if (banner[n-1] != '\n') {
816                     ERRLOG(("rump_sp: invalid banner\n"));
817                     return -1;
818           }
819           banner[n] = '\0';
820           /* XXX parse the banner some day */
821 
822           flags = host_fcntl(s, F_GETFL, 0);
823           if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
824                     ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
825                     return -1;
826           }
827           clispc.spc_fd = s;
828           clispc.spc_state = SPCSTATE_RUNNING;
829           clispc.spc_reconnecting = 0;
830           holyfd = makeholyfd();
831 
832           return 0;
833 }
834 
835 static int
doinit(void)836 doinit(void)
837 {
838 
839           TAILQ_INIT(&clispc.spc_respwait);
840           pthread_mutex_init(&clispc.spc_mtx, NULL);
841           pthread_cond_init(&clispc.spc_cv, NULL);
842 
843           return 0;
844 }
845 
846 #ifdef RTLD_NEXT
847 void *rumpclient__dlsym(void *, const char *);
848 void *
rumpclient__dlsym(void * handle,const char * symbol)849 rumpclient__dlsym(void *handle, const char *symbol)
850 {
851 
852           return dlsym(handle, symbol);
853 }
854 void *rumphijack_dlsym(void *, const char *)
855     __attribute__((__weak__, alias("rumpclient__dlsym")));
856 #endif
857 
858 static pid_t init_done = 0;
859 
860 int
rumpclient_init(void)861 rumpclient_init(void)
862 {
863           char *p;
864           int error;
865           int rv = -1;
866           int hstype;
867           pid_t mypid;
868 
869           /*
870            * Make sure we're not riding the context of a previous
871            * host fork.  Note: it's *possible* that after n>1 forks
872            * we have the same pid as one of our exited parents, but
873            * I'm pretty sure there are 0 practical implications, since
874            * it means generations would have to skip rumpclient init.
875            */
876           if (init_done == (mypid = getpid()))
877                     return 0;
878 
879 #ifdef USE_KQUEUE
880           /* kq does not traverse fork() */
881           holyfd = -1;
882 #endif
883           init_done = mypid;
884 
885           sigfillset(&fullset);
886 
887           /*
888            * sag mir, wo die symbols sind.  zogen fort, der krieg beginnt.
889            * wann wird man je verstehen?  wann wird man je verstehen?
890            */
891 #ifdef RTLD_NEXT
892 #define FINDSYM2(_name_,_syscall_)                                              \
893           if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT,            \
894               #_syscall_)) == NULL) {                                           \
895                     if (rumphijack_dlsym == rumpclient__dlsym)                  \
896                               host_##_name_ = _name_; /* static fallback */     \
897                     if (host_##_name_ == NULL) {                                \
898                               fprintf(stderr,"cannot find %s: %s", #_syscall_,\
899                                   dlerror());                                             \
900                               exit(1);                                          \
901                     }                                                                     \
902           }
903 #else
904 #define FINDSYM2(_name_,_syscall)                                               \
905           host_##_name_ = _name_;
906 #endif
907 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
908 #ifdef __NetBSD__
909           FINDSYM2(socket,__socket30)
910 #else
911           FINDSYM(socket)
912 #endif
913 
914           FINDSYM(close)
915           FINDSYM(connect)
916           FINDSYM(fcntl)
917           FINDSYM(poll)
918           FINDSYM(read)
919           FINDSYM(sendmsg)
920           FINDSYM(setsockopt)
921           FINDSYM(dup)
922           FINDSYM(execve)
923 
924 #ifdef USE_KQUEUE
925           FINDSYM(kqueue)
926 #ifdef __NetBSD__
927 #if !__NetBSD_Prereq__(5,99,7)
928           FINDSYM(kevent)
929 #elif !__NetBSD_Prereq__(10,99,7)
930           FINDSYM2(kevent,_sys___kevent50)
931 #else
932           FINDSYM2(kevent,_sys___kevent100)
933 #endif
934 #else
935           FINDSYM(kevent)
936 #endif
937 #endif /* USE_KQUEUE */
938 
939 #ifdef USE_SIGNALFD
940           FINDSYM(signalfd)
941 #endif
942 
943 #undef    FINDSYM
944 #undef    FINDSY2
945 
946           if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
947                     if ((p = getenv("RUMP_SERVER")) == NULL) {
948                               fprintf(stderr, "error: RUMP_SERVER not set\n");
949                               errno = ENOENT;
950                               goto out;
951                     }
952           }
953 
954           if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
955                     errno = error;
956                     goto out;
957           }
958 
959           if (doinit() == -1)
960                     goto out;
961 
962           if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
963                     sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
964                     unsetenv("RUMPCLIENT__EXECFD");
965                     hstype = HANDSHAKE_EXEC;
966           } else {
967                     if (doconnect() == -1)
968                               goto out;
969                     hstype = HANDSHAKE_GUEST;
970           }
971 
972           error = handshake_req(&clispc, hstype, NULL, 0, false);
973           if (error) {
974                     pthread_mutex_destroy(&clispc.spc_mtx);
975                     pthread_cond_destroy(&clispc.spc_cv);
976                     if (clispc.spc_fd != -1)
977                               host_close(clispc.spc_fd);
978                     errno = error;
979                     goto out;
980           }
981           rv = 0;
982 
983  out:
984           if (rv == -1)
985                     init_done = 0;
986           return rv;
987 }
988 
989 struct rumpclient_fork {
990           uint32_t fork_auth[AUTHLEN];
991           struct spclient fork_spc;
992           int fork_holyfd;
993 };
994 
995 struct rumpclient_fork *
rumpclient_prefork(void)996 rumpclient_prefork(void)
997 {
998           struct rumpclient_fork *rpf;
999           sigset_t omask;
1000           void *resp;
1001           int rv;
1002 
1003           pthread_sigmask(SIG_SETMASK, &fullset, &omask);
1004           rpf = malloc(sizeof(*rpf));
1005           if (rpf == NULL)
1006                     goto out;
1007 
1008           if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
1009                     free(rpf);
1010                     errno = rv;
1011                     rpf = NULL;
1012                     goto out;
1013           }
1014 
1015           memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
1016           free(resp);
1017 
1018           rpf->fork_spc = clispc;
1019           rpf->fork_holyfd = holyfd;
1020 
1021  out:
1022           pthread_sigmask(SIG_SETMASK, &omask, NULL);
1023           return rpf;
1024 }
1025 
1026 int
rumpclient_fork_init(struct rumpclient_fork * rpf)1027 rumpclient_fork_init(struct rumpclient_fork *rpf)
1028 {
1029           int error;
1030           int osock;
1031 
1032           osock = clispc.spc_fd;
1033           memset(&clispc, 0, sizeof(clispc));
1034           clispc.spc_fd = osock;
1035 
1036 #ifdef USE_KQUEUE
1037           holyfd = -1; /* kqueue descriptor is not copied over fork() */
1038 #else
1039           if (holyfd != -1) {
1040                     host_close(holyfd);
1041                     holyfd = -1;
1042           }
1043 #endif
1044 
1045           if (doinit() == -1)
1046                     return -1;
1047           if (doconnect() == -1)
1048                     return -1;
1049 
1050           error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
1051               0, false);
1052           if (error) {
1053                     pthread_mutex_destroy(&clispc.spc_mtx);
1054                     pthread_cond_destroy(&clispc.spc_cv);
1055                     errno = error;
1056                     return -1;
1057           }
1058 
1059           return 0;
1060 }
1061 
1062 /*ARGSUSED*/
1063 void
rumpclient_fork_cancel(struct rumpclient_fork * rpf)1064 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
1065 {
1066 
1067           /* EUNIMPL */
1068 }
1069 
1070 void
rumpclient_fork_vparent(struct rumpclient_fork * rpf)1071 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1072 {
1073 
1074           clispc = rpf->fork_spc;
1075           holyfd = rpf->fork_holyfd;
1076 }
1077 
1078 void
rumpclient_setconnretry(time_t timeout)1079 rumpclient_setconnretry(time_t timeout)
1080 {
1081 
1082           if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1083                     return; /* gigo */
1084 
1085           retrytimo = timeout;
1086 }
1087 
1088 int
rumpclient__closenotify(int * fdp,enum rumpclient_closevariant variant)1089 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1090 {
1091           int fd = *fdp;
1092           int untilfd;
1093           int newfd;
1094 
1095           switch (variant) {
1096           case RUMPCLIENT_CLOSE_FCLOSEM:
1097                     untilfd = MAX(clispc.spc_fd, holyfd);
1098                     for (; fd <= untilfd; fd++) {
1099                               if (fd == clispc.spc_fd || fd == holyfd)
1100                                         continue;
1101                               (void)host_close(fd);
1102                     }
1103                     *fdp = fd;
1104                     break;
1105 
1106           case RUMPCLIENT_CLOSE_CLOSE:
1107           case RUMPCLIENT_CLOSE_DUP2:
1108                     if (fd == clispc.spc_fd) {
1109                               newfd = dupgood(clispc.spc_fd, 1);
1110                               if (newfd == -1)
1111                                         return -1;
1112 
1113 #ifdef USE_KQUEUE
1114                               {
1115                               struct kevent kev[2];
1116 
1117                               /*
1118                                * now, we have a new socket number, so change
1119                                * the file descriptor that kqueue is
1120                                * monitoring.  remove old and add new.
1121                                */
1122                               EV_SET(&kev[0], clispc.spc_fd,
1123                                   EVFILT_READ, EV_DELETE, 0, 0, 0);
1124                               EV_SET(&kev[1], newfd,
1125                                   EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1126                               if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
1127                                         int sverrno = errno;
1128                                         host_close(newfd);
1129                                         errno = sverrno;
1130                                         return -1;
1131                               }}
1132 #endif /* !USE_KQUEUE */
1133                               clispc.spc_fd = newfd;
1134                     }
1135                     if (holyfd != -1 && fd == holyfd) {
1136                               newfd = dupgood(holyfd, 1);
1137                               if (newfd == -1)
1138                                         return -1;
1139                               holyfd = newfd;
1140                     }
1141                     break;
1142           }
1143 
1144           return 0;
1145 }
1146 
1147 pid_t
rumpclient_fork(void)1148 rumpclient_fork(void)
1149 {
1150 
1151           return rumpclient__dofork(fork);
1152 }
1153 
1154 /*
1155  * Process is about to exec.  Save info about our existing connection
1156  * in the env.  rumpclient will check for this info in init().
1157  * This is mostly for the benefit of rumphijack, but regular applications
1158  * may use it as well.
1159  */
1160 int
rumpclient_exec(const char * path,char * const argv[],char * const envp[])1161 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1162 {
1163           char buf[4096];
1164           char **newenv;
1165           char *envstr, *envstr2;
1166           size_t nelem;
1167           int rv, sverrno;
1168 
1169           snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1170               clispc.spc_fd, holyfd);
1171           envstr = malloc(strlen(buf)+1);
1172           if (envstr == NULL) {
1173                     return ENOMEM;
1174           }
1175           strcpy(envstr, buf);
1176 
1177           /* do we have a fully parsed url we want to forward in the env? */
1178           if (*parsedurl != '\0') {
1179                     snprintf(buf, sizeof(buf),
1180                         "RUMP__PARSEDSERVER=%s", parsedurl);
1181                     envstr2 = malloc(strlen(buf)+1);
1182                     if (envstr2 == NULL) {
1183                               free(envstr);
1184                               return ENOMEM;
1185                     }
1186                     strcpy(envstr2, buf);
1187           } else {
1188                     envstr2 = NULL;
1189           }
1190 
1191           for (nelem = 0; envp && envp[nelem]; nelem++)
1192                     continue;
1193 
1194           newenv = malloc(sizeof(*newenv) * (nelem+3));
1195           if (newenv == NULL) {
1196                     free(envstr2);
1197                     free(envstr);
1198                     return ENOMEM;
1199           }
1200           memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1201 
1202           newenv[nelem] = envstr;
1203           newenv[nelem+1] = envstr2;
1204           newenv[nelem+2] = NULL;
1205 
1206           rv = host_execve(path, argv, newenv);
1207 
1208           _DIAGASSERT(rv != 0);
1209           sverrno = errno;
1210           free(envstr2);
1211           free(envstr);
1212           free(newenv);
1213           errno = sverrno;
1214           return rv;
1215 }
1216 
1217 /*
1218  * daemon() is handwritten for the benefit of platforms which
1219  * do not support daemon().
1220  */
1221 int
rumpclient_daemon(int nochdir,int noclose)1222 rumpclient_daemon(int nochdir, int noclose)
1223 {
1224           struct rumpclient_fork *rf;
1225           int sverrno;
1226 
1227           if ((rf = rumpclient_prefork()) == NULL)
1228                     return -1;
1229 
1230           switch (fork()) {
1231           case 0:
1232                     break;
1233           case -1:
1234                     goto daemonerr;
1235           default:
1236                     _exit(0);
1237           }
1238 
1239           if (setsid() == -1)
1240                     goto daemonerr;
1241           if (!nochdir && chdir("/") == -1)
1242                     goto daemonerr;
1243           if (!noclose) {
1244                     int fd = open("/dev/null", O_RDWR);
1245                     dup2(fd, 0);
1246                     dup2(fd, 1);
1247                     dup2(fd, 2);
1248                     if (fd > 2)
1249                               close(fd);
1250           }
1251 
1252           /* note: fork is either completed or cancelled by the call */
1253           if (rumpclient_fork_init(rf) == -1)
1254                     return -1;
1255 
1256           return 0;
1257 
1258  daemonerr:
1259           sverrno = errno;
1260           rumpclient_fork_cancel(rf);
1261           errno = sverrno;
1262           return -1;
1263 }
1264