1 /*        $NetBSD: tcp-comms.c,v 1.1.1.2 2009/12/02 00:27:06 haad Exp $         */
2 
3 /*
4  *  Copyright (C) 2002-2003 Sistina Software, Inc. All rights reserved.
5  *  Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * This provides the inter-clvmd communications for a system without CMAN.
20  * There is a listening TCP socket which accepts new connections in the
21  * normal way.
22  * It can also make outgoing connnections to the other clvmd nodes.
23  */
24 
25 #define _GNU_SOURCE
26 #define _FILE_OFFSET_BITS 64
27 
28 #include <configure.h>
29 #include <pthread.h>
30 #include <sys/types.h>
31 #include <sys/utsname.h>
32 #include <sys/ioctl.h>
33 #include <sys/socket.h>
34 #include <sys/stat.h>
35 #include <sys/socket.h>
36 #include <netinet/in.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stdint.h>
40 #include <fcntl.h>
41 #include <string.h>
42 #include <stddef.h>
43 #include <stdint.h>
44 #include <unistd.h>
45 #include <errno.h>
46 #include <syslog.h>
47 #include <netdb.h>
48 #include <assert.h>
49 #include <libdevmapper.h>
50 
51 #include "clvm.h"
52 #include "clvmd-comms.h"
53 #include "clvmd.h"
54 #include "clvmd-gulm.h"
55 
56 #define DEFAULT_TCP_PORT 21064
57 
58 static int listen_fd = -1;
59 static int tcp_port;
60 struct dm_hash_table *sock_hash;
61 
62 static int get_our_ip_address(char *addr, int *family);
63 static int read_from_tcpsock(struct local_client *fd, char *buf, int len, char *csid,
64                                    struct local_client **new_client);
65 
66 /* Called by init_cluster() to open up the listening socket */
init_comms(unsigned short port)67 int init_comms(unsigned short port)
68 {
69     struct sockaddr_in6 addr;
70 
71     sock_hash = dm_hash_create(100);
72     tcp_port = port ? : DEFAULT_TCP_PORT;
73 
74     listen_fd = socket(AF_INET6, SOCK_STREAM, 0);
75 
76     if (listen_fd < 0)
77     {
78           return -1;
79     }
80     else
81     {
82           int one = 1;
83           setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(int));
84           setsockopt(listen_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
85     }
86 
87     memset(&addr, 0, sizeof(addr)); // Bind to INADDR_ANY
88     addr.sin6_family = AF_INET6;
89     addr.sin6_port = htons(tcp_port);
90 
91     if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
92     {
93           DEBUGLOG("Can't bind to port: %s\n", strerror(errno));
94           syslog(LOG_ERR, "Can't bind to port %d, is clvmd already running ?", tcp_port);
95           close(listen_fd);
96           return -1;
97     }
98 
99     listen(listen_fd, 5);
100 
101     /* Set Close-on-exec */
102     fcntl(listen_fd, F_SETFD, 1);
103 
104     return 0;
105 }
106 
tcp_remove_client(const char * c_csid)107 void tcp_remove_client(const char *c_csid)
108 {
109     struct local_client *client;
110     char csid[GULM_MAX_CSID_LEN];
111     unsigned int i;
112     memcpy(csid, c_csid, sizeof csid);
113     DEBUGLOG("tcp_remove_client\n");
114 
115     /* Don't actually close the socket here - that's the
116        job of clvmd.c whch will do the job when it notices the
117        other end has gone. We just need to remove the client(s) from
118        the hash table so we don't try to use it for sending any more */
119     for (i = 0; i < 2; i++)
120     {
121           client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
122           if (client)
123           {
124               dm_hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
125               client->removeme = 1;
126               close(client->fd);
127           }
128           /* Look for a mangled one too, on the 2nd iteration. */
129           csid[0] ^= 0x80;
130     }
131 }
132 
alloc_client(int fd,const char * c_csid,struct local_client ** new_client)133 int alloc_client(int fd, const char *c_csid, struct local_client **new_client)
134 {
135     struct local_client *client;
136     char csid[GULM_MAX_CSID_LEN];
137     memcpy(csid, c_csid, sizeof csid);
138 
139     DEBUGLOG("alloc_client %d csid = %s\n", fd, print_csid(csid));
140 
141     /* Create a local_client and return it */
142     client = malloc(sizeof(struct local_client));
143     if (!client)
144     {
145           DEBUGLOG("malloc failed\n");
146           return -1;
147     }
148 
149     memset(client, 0, sizeof(struct local_client));
150     client->fd = fd;
151     client->type = CLUSTER_DATA_SOCK;
152     client->callback = read_from_tcpsock;
153     if (new_client)
154           *new_client = client;
155 
156     /* Add to our list of node sockets */
157     if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
158     {
159           DEBUGLOG("alloc_client mangling CSID for second connection\n");
160           /* This is a duplicate connection but we can't close it because
161              the other end may already have started sending.
162              So, we mangle the IP address and keep it, all sending will
163              go out of the main FD
164           */
165           csid[0] ^= 0x80;
166           client->bits.net.flags = 1; /* indicate mangled CSID */
167 
168         /* If it still exists then kill the connection as we should only
169            ever have one incoming connection from each node */
170         if (dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN))
171         {
172               DEBUGLOG("Multiple incoming connections from node\n");
173             syslog(LOG_ERR, " Bogus incoming connection from %d.%d.%d.%d\n", csid[0],csid[1],csid[2],csid[3]);
174 
175               free(client);
176             errno = ECONNREFUSED;
177             return -1;
178         }
179     }
180     dm_hash_insert_binary(sock_hash, csid, GULM_MAX_CSID_LEN, client);
181 
182     return 0;
183 }
184 
get_main_gulm_cluster_fd()185 int get_main_gulm_cluster_fd()
186 {
187     return listen_fd;
188 }
189 
190 
191 /* Read on main comms (listen) socket, accept it */
cluster_fd_gulm_callback(struct local_client * fd,char * buf,int len,const char * csid,struct local_client ** new_client)192 int cluster_fd_gulm_callback(struct local_client *fd, char *buf, int len, const char *csid,
193                               struct local_client **new_client)
194 {
195     int newfd;
196     struct sockaddr_in6 addr;
197     socklen_t addrlen = sizeof(addr);
198     int status;
199     char name[GULM_MAX_CLUSTER_MEMBER_NAME_LEN];
200 
201     DEBUGLOG("cluster_fd_callback\n");
202     *new_client = NULL;
203     newfd = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
204 
205     DEBUGLOG("cluster_fd_callback, newfd=%d (errno=%d)\n", newfd, errno);
206     if (!newfd)
207     {
208           syslog(LOG_ERR, "error in accept: %m");
209           errno = EAGAIN;
210           return -1; /* Don't return an error or clvmd will close the listening FD */
211     }
212 
213     /* Check that the client is a member of the cluster
214        and reject if not.
215     */
216     if (gulm_name_from_csid((char *)&addr.sin6_addr, name) < 0)
217     {
218           syslog(LOG_ERR, "Got connect from non-cluster node %s\n",
219                  print_csid((char *)&addr.sin6_addr));
220           DEBUGLOG("Got connect from non-cluster node %s\n",
221                      print_csid((char *)&addr.sin6_addr));
222           close(newfd);
223 
224           errno = EAGAIN;
225           return -1;
226     }
227 
228     status = alloc_client(newfd, (char *)&addr.sin6_addr, new_client);
229     if (status)
230     {
231           DEBUGLOG("cluster_fd_callback, alloc_client failed, status = %d\n", status);
232           close(newfd);
233           /* See above... */
234           errno = EAGAIN;
235           return -1;
236     }
237     DEBUGLOG("cluster_fd_callback, returning %d, %p\n", newfd, *new_client);
238     return newfd;
239 }
240 
241 /* Try to get at least 'len' bytes from the socket */
really_read(int fd,char * buf,int len)242 static int really_read(int fd, char *buf, int len)
243 {
244           int got, offset;
245 
246           got = offset = 0;
247 
248           do {
249                     got = read(fd, buf+offset, len-offset);
250                     DEBUGLOG("really_read. got %d bytes\n", got);
251                     offset += got;
252           } while (got > 0 && offset < len);
253 
254           if (got < 0)
255                     return got;
256           else
257                     return offset;
258 }
259 
260 
read_from_tcpsock(struct local_client * client,char * buf,int len,char * csid,struct local_client ** new_client)261 static int read_from_tcpsock(struct local_client *client, char *buf, int len, char *csid,
262                                    struct local_client **new_client)
263 {
264     struct sockaddr_in6 addr;
265     socklen_t slen = sizeof(addr);
266     struct clvm_header *header = (struct clvm_header *)buf;
267     int status;
268     uint32_t arglen;
269 
270     DEBUGLOG("read_from_tcpsock fd %d\n", client->fd);
271     *new_client = NULL;
272 
273     /* Get "csid" */
274     getpeername(client->fd, (struct sockaddr *)&addr, &slen);
275     memcpy(csid, &addr.sin6_addr, GULM_MAX_CSID_LEN);
276 
277     /* Read just the header first, then get the rest if there is any.
278      * Stream sockets, sigh.
279      */
280     status = really_read(client->fd, buf, sizeof(struct clvm_header));
281     if (status > 0)
282     {
283               int status2;
284 
285               arglen = ntohl(header->arglen);
286 
287               /* Get the rest */
288               if (arglen && arglen < GULM_MAX_CLUSTER_MESSAGE)
289               {
290                         status2 = really_read(client->fd, buf+status, arglen);
291                         if (status2 > 0)
292                                   status += status2;
293                         else
294                                   status = status2;
295               }
296     }
297 
298     DEBUGLOG("read_from_tcpsock, status = %d(errno = %d)\n", status, errno);
299 
300     /* Remove it from the hash table if there's an error, clvmd will
301        remove the socket from its lists and free the client struct */
302     if (status == 0 ||
303           (status < 0 && errno != EAGAIN && errno != EINTR))
304     {
305           char remcsid[GULM_MAX_CSID_LEN];
306 
307           memcpy(remcsid, csid, GULM_MAX_CSID_LEN);
308           close(client->fd);
309 
310           /* If the csid was mangled, then make sure we remove the right entry */
311           if (client->bits.net.flags)
312               remcsid[0] ^= 0x80;
313           dm_hash_remove_binary(sock_hash, remcsid, GULM_MAX_CSID_LEN);
314 
315           /* Tell cluster manager layer */
316           add_down_node(remcsid);
317     }
318     else {
319               gulm_add_up_node(csid);
320               /* Send it back to clvmd */
321               process_message(client, buf, status, csid);
322     }
323     return status;
324 }
325 
gulm_connect_csid(const char * csid,struct local_client ** newclient)326 int gulm_connect_csid(const char *csid, struct local_client **newclient)
327 {
328     int fd;
329     struct sockaddr_in6 addr;
330     int status;
331     int one = 1;
332 
333     DEBUGLOG("Connecting socket\n");
334     fd = socket(PF_INET6, SOCK_STREAM, 0);
335 
336     if (fd < 0)
337     {
338           syslog(LOG_ERR, "Unable to create new socket: %m");
339           return -1;
340     }
341 
342     addr.sin6_family = AF_INET6;
343     memcpy(&addr.sin6_addr, csid, GULM_MAX_CSID_LEN);
344     addr.sin6_port = htons(tcp_port);
345 
346     DEBUGLOG("Connecting socket %d\n", fd);
347     if (connect(fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_in6)) < 0)
348     {
349           /* "Connection refused" is "normal" because clvmd may not yet be running
350            * on that node.
351            */
352           if (errno != ECONNREFUSED)
353           {
354               syslog(LOG_ERR, "Unable to connect to remote node: %m");
355           }
356           DEBUGLOG("Unable to connect to remote node: %s\n", strerror(errno));
357           close(fd);
358           return -1;
359     }
360 
361     /* Set Close-on-exec */
362     fcntl(fd, F_SETFD, 1);
363     setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(int));
364 
365     status = alloc_client(fd, csid, newclient);
366     if (status)
367           close(fd);
368     else
369           add_client(*newclient);
370 
371     /* If we can connect to it, it must be running a clvmd */
372     gulm_add_up_node(csid);
373     return status;
374 }
375 
376 /* Send a message to a known CSID */
tcp_send_message(void * buf,int msglen,const char * csid,const char * errtext)377 static int tcp_send_message(void *buf, int msglen, const char *csid, const char *errtext)
378 {
379     int status;
380     struct local_client *client;
381     char ourcsid[GULM_MAX_CSID_LEN];
382 
383     assert(csid);
384 
385     DEBUGLOG("tcp_send_message, csid = %s, msglen = %d\n", print_csid(csid), msglen);
386 
387     /* Don't connect to ourself */
388     get_our_gulm_csid(ourcsid);
389     if (memcmp(csid, ourcsid, GULM_MAX_CSID_LEN) == 0)
390           return msglen;
391 
392     client = dm_hash_lookup_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
393     if (!client)
394     {
395           status = gulm_connect_csid(csid, &client);
396           if (status)
397               return -1;
398     }
399     DEBUGLOG("tcp_send_message, fd = %d\n", client->fd);
400 
401     return write(client->fd, buf, msglen);
402 }
403 
404 
gulm_cluster_send_message(void * buf,int msglen,const char * csid,const char * errtext)405 int gulm_cluster_send_message(void *buf, int msglen, const char *csid, const char *errtext)
406 {
407     int status=0;
408 
409     DEBUGLOG("cluster send message, csid = %p, msglen = %d\n", csid, msglen);
410 
411     /* If csid is NULL then send to all known (not just connected) nodes */
412     if (!csid)
413     {
414           void *context = NULL;
415           char loop_csid[GULM_MAX_CSID_LEN];
416 
417           /* Loop round all gulm-known nodes */
418           while (get_next_node_csid(&context, loop_csid))
419           {
420               status = tcp_send_message(buf, msglen, loop_csid, errtext);
421               if (status == 0 ||
422                     (status < 0 && (errno == EAGAIN || errno == EINTR)))
423                     break;
424           }
425     }
426     else
427     {
428 
429           status = tcp_send_message(buf, msglen, csid, errtext);
430     }
431     return status;
432 }
433 
434 /* To get our own IP address we get the locally bound address of the
435    socket that's talking to GULM in the assumption(eek) that it will
436    be on the "right" network in a multi-homed system */
get_our_ip_address(char * addr,int * family)437 static int get_our_ip_address(char *addr, int *family)
438 {
439           struct utsname info;
440 
441           uname(&info);
442           get_ip_address(info.nodename, addr);
443 
444           return 0;
445 }
446 
447 /* Public version of above for those that don't care what protocol
448    we're using */
get_our_gulm_csid(char * csid)449 void get_our_gulm_csid(char *csid)
450 {
451     static char our_csid[GULM_MAX_CSID_LEN];
452     static int got_csid = 0;
453 
454     if (!got_csid)
455     {
456           int family;
457 
458           memset(our_csid, 0, sizeof(our_csid));
459           if (get_our_ip_address(our_csid, &family))
460           {
461               got_csid = 1;
462           }
463     }
464     memcpy(csid, our_csid, GULM_MAX_CSID_LEN);
465 }
466 
map_v4_to_v6(struct in_addr * ip4,struct in6_addr * ip6)467 static void map_v4_to_v6(struct in_addr *ip4, struct in6_addr *ip6)
468 {
469    ip6->s6_addr32[0] = 0;
470    ip6->s6_addr32[1] = 0;
471    ip6->s6_addr32[2] = htonl(0xffff);
472    ip6->s6_addr32[3] = ip4->s_addr;
473 }
474 
475 /* Get someone else's IP address from DNS */
get_ip_address(const char * node,char * addr)476 int get_ip_address(const char *node, char *addr)
477 {
478     struct hostent *he;
479 
480     memset(addr, 0, GULM_MAX_CSID_LEN);
481 
482     // TODO: what do we do about multi-homed hosts ???
483     // CCSs ip_interfaces solved this but some bugger removed it.
484 
485     /* Try IPv6 first. The man page for gethostbyname implies that
486        it will lookup ip6 & ip4 names, but it seems not to */
487     he = gethostbyname2(node, AF_INET6);
488     if (he)
489     {
490           memcpy(addr, he->h_addr_list[0],
491                  he->h_length);
492     }
493     else
494     {
495           he = gethostbyname2(node, AF_INET);
496           if (!he)
497               return -1;
498           map_v4_to_v6((struct in_addr *)he->h_addr_list[0], (struct in6_addr *)addr);
499     }
500 
501     return 0;
502 }
503 
print_csid(const char * csid)504 char *print_csid(const char *csid)
505 {
506     static char buf[128];
507     int *icsid = (int *)csid;
508 
509     sprintf(buf, "[%x.%x.%x.%x]",
510               icsid[0],icsid[1],icsid[2],icsid[3]);
511 
512     return buf;
513 }
514