/* get-next-event loop * * Copyright (C) 1997 Angelos D. Keromytis. * Copyright (C) 1998-2002, 2013 D. Hugh Redelmeier * Copyright (C) 2003-2008 Michael C Richardson * Copyright (C) 2003-2010 Paul Wouters * Copyright (C) 2008-2009 David McCullough * Copyright (C) 2009 Avesh Agarwal * Copyright (C) 2010 Tuomo Soini * Copyright (C) 2012-2013 Paul Wouters * Copyright (C) 2013 Wolfgang Nothdurft * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. See . * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * */ #include #include #include #include #include #include #include #include #include #include #include #ifdef SOLARIS # include /* for Solaris 2.6: defines SIOCGIFCONF */ #endif #include #include #include #include /* only used for forensic poll call */ #include #include #include #include #include #include #include #include #if defined(IP_RECVERR) && defined(MSG_ERRQUEUE) # include /* for __u8, __u32 */ # include # include /* struct iovec */ #endif #include #include "sysdep.h" #include "socketwrapper.h" #include "constants.h" #include "defs.h" #include "state.h" #include "id.h" #include "x509.h" #include "certs.h" #include "connections.h" /* needs id.h */ #include "kernel.h" /* for no_klips; needs connections.h */ #include "log.h" #include "server.h" #include "timer.h" #include "packet.h" #include "demux.h" /* needs packet.h */ #include "rcv_whack.h" #include "keys.h" #include "adns.h" /* needs */ #include "dnskey.h" /* needs keys.h and adns.h */ #include "whack.h" /* for RC_LOG_SERIOUS */ #include "pluto_crypt.h" /* cryptographic helper functions */ #include "udpfromto.h" #include #include #include "kameipsec.h" #include "nat_traversal.h" #include "lsw_select.h" /* * Server main loop and socket initialization routines. */ static const int on = TRUE; /* by-reference parameter; constant, we hope */ bool no_retransmits = FALSE; char *pluto_vendorid; static pid_t addconn_child_pid = 0; /* list of interface devices */ struct iface_list interface_dev; /* control (whack) socket */ int ctl_fd = NULL_FD; /* file descriptor of control (whack) socket */ struct sockaddr_un ctl_addr = { .sun_family = AF_UNIX, #if defined(HAS_SUN_LEN) .sun_len = sizeof(struct sockaddr_un), #endif .sun_path = DEFAULT_CTLBASE CTL_SUFFIX }; struct sockaddr_un info_addr = { .sun_family = AF_UNIX, #if defined(HAS_SUN_LEN) .sun_len = sizeof(struct sockaddr_un), #endif .sun_path = DEFAULT_CTLBASE INFO_SUFFIX }; /* Initialize the control socket. * Note: this is called very early, so little infrastructure is available. * It is important that the socket is created before the original * Pluto process returns. */ err_t init_ctl_socket(void) { err_t failed = NULL; LIST_INIT(&interface_dev); delete_ctl_socket(); /* preventative medicine */ ctl_fd = safe_socket(AF_UNIX, SOCK_STREAM, 0); if (ctl_fd == -1) { failed = "create"; } else if (fcntl(ctl_fd, F_SETFD, FD_CLOEXEC) == -1) { failed = "fcntl FD+CLOEXEC"; } else if (setsockopt(ctl_fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof(on)) < 0) { failed = "setsockopt"; } else { /* to keep control socket secure, use umask */ #ifdef PLUTO_GROUP_CTL mode_t ou = umask(~(S_IRWXU | S_IRWXG)); #else mode_t ou = umask(~S_IRWXU); #endif if (bind(ctl_fd, (struct sockaddr *)&ctl_addr, offsetof(struct sockaddr_un, sun_path) + strlen(ctl_addr.sun_path)) < 0) failed = "bind"; umask(ou); } #ifdef PLUTO_GROUP_CTL { struct group *g; g = getgrnam("pluto"); if (g != NULL) { if (fchown(ctl_fd, -1, g->gr_gid) != 0) { loglog(RC_LOG_SERIOUS, "Can not chgrp ctl fd(%d) to gid=%d: %s\n", ctl_fd, g->gr_gid, strerror(errno)); } } } #endif /* 5 is a haphazardly chosen limit for the backlog. * Rumour has it that this is the max on BSD systems. */ if (failed == NULL && listen(ctl_fd, 5) < 0) failed = "listen() on"; return failed == NULL ? NULL : builddiag( "could not %s control socket: %d %s", failed, errno, strerror(errno)); } void delete_ctl_socket(void) { /* Is noting failure useful? Not when used as preventative medicine. */ unlink(ctl_addr.sun_path); } bool listening = FALSE; /* should we pay attention to IKE messages? */ struct iface_port *interfaces = NULL; /* public interfaces */ /* Initialize the interface sockets. */ static void mark_ifaces_dead(void) { struct iface_port *p; for (p = interfaces; p != NULL; p = p->next) p->change = IFN_DELETE; } static void free_dead_iface_dev(struct iface_dev *id) { if (--id->id_count == 0) { pfree(id->id_vname); pfree(id->id_rname); LIST_REMOVE(id, id_entry); pfree(id); } } static void free_dead_ifaces(void) { struct iface_port *p; bool some_dead = FALSE, some_new = FALSE; for (p = interfaces; p != NULL; p = p->next) { if (p->change == IFN_DELETE) { ipstr_buf b; libreswan_log("shutting down interface %s/%s %s:%d", p->ip_dev->id_vname, p->ip_dev->id_rname, ipstr(&p->ip_addr, &b), p->port); some_dead = TRUE; } else if (p->change == IFN_ADD) { some_new = TRUE; } } if (some_dead) { struct iface_port **pp; release_dead_interfaces(); delete_states_dead_interfaces(); for (pp = &interfaces; (p = *pp) != NULL; ) { if (p->change == IFN_DELETE) { struct iface_dev *id; *pp = p->next; /* advance *pp */ close(p->fd); id = p->ip_dev; pfree(p); free_dead_iface_dev(id); } else { pp = &p->next; /* advance pp */ } } } /* this must be done after the release_dead_interfaces * in case some to the newly unoriented connections can * become oriented here. */ if (some_dead || some_new) check_orientations(); } void free_ifaces(void) { mark_ifaces_dead(); free_dead_ifaces(); } struct raw_iface *static_ifn = NULL; int create_socket(struct raw_iface *ifp, const char *v_name, int port) { int fd = socket(addrtypeof(&ifp->addr), SOCK_DGRAM, IPPROTO_UDP); int fcntl_flags; if (fd < 0) { log_errno((e, "socket() in process_raw_ifaces()")); return -1; } /* Set socket Nonblocking */ if ((fcntl_flags = fcntl(fd, F_GETFL)) >= 0) { if (!(fcntl_flags & O_NONBLOCK)) { fcntl_flags |= O_NONBLOCK; fcntl(fd, F_SETFL, fcntl_flags); } } if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) { log_errno((e, "fcntl(,, FD_CLOEXEC) in process_raw_ifaces()")); close(fd); return -1; } if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void *)&on, sizeof(on)) < 0) { log_errno((e, "setsockopt SO_REUSEADDR in process_raw_ifaces()")); close(fd); return -1; } /* To improve error reporting. See ip(7). */ #if defined(IP_RECVERR) && defined(MSG_ERRQUEUE) if (setsockopt(fd, SOL_IP, IP_RECVERR, (const void *)&on, sizeof(on)) < 0) { log_errno((e, "setsockopt IP_RECVERR in process_raw_ifaces()")); close(fd); return -1; } #endif /* With IPv6, there is no fragmentation after * it leaves our interface. PMTU discovery * is mandatory but doesn't work well with IKE (why?). * So we must set the IPV6_USE_MIN_MTU option. * See draft-ietf-ipngwg-rfc2292bis-01.txt 11.1 */ #ifdef IPV6_USE_MIN_MTU /* YUCK: not always defined */ if (addrtypeof(&ifp->addr) == AF_INET6 && setsockopt(fd, SOL_SOCKET, IPV6_USE_MIN_MTU, (const void *)&on, sizeof(on)) < 0) { log_errno((e, "setsockopt IPV6_USE_MIN_MTU in process_raw_ifaces()")); close(fd); return -1; } #endif #if defined(linux) && defined(NETKEY_SUPPORT) if (kern_interface == USE_NETKEY) { struct sadb_x_policy policy; int level, opt; zero(&policy); policy.sadb_x_policy_len = sizeof(policy) / IPSEC_PFKEYv2_ALIGN; policy.sadb_x_policy_exttype = SADB_X_EXT_POLICY; policy.sadb_x_policy_type = IPSEC_POLICY_BYPASS; policy.sadb_x_policy_dir = IPSEC_DIR_INBOUND; policy.sadb_x_policy_id = 0; if (addrtypeof(&ifp->addr) == AF_INET6) { level = IPPROTO_IPV6; opt = IPV6_IPSEC_POLICY; } else { level = IPPROTO_IP; opt = IP_IPSEC_POLICY; } if (setsockopt(fd, level, opt, &policy, sizeof(policy)) < 0) { log_errno((e, "setsockopt IPSEC_POLICY in process_raw_ifaces()")); close(fd); return -1; } policy.sadb_x_policy_dir = IPSEC_DIR_OUTBOUND; if (setsockopt(fd, level, opt, &policy, sizeof(policy)) < 0) { log_errno((e, "setsockopt IPSEC_POLICY in process_raw_ifaces()")); close(fd); return -1; } } #endif setportof(htons(port), &ifp->addr); if (bind(fd, sockaddrof(&ifp->addr), sockaddrlenof(&ifp->addr)) < 0) { ipstr_buf b; log_errno((e, "bind() for %s/%s %s:%u in process_raw_ifaces()", ifp->name, v_name, ipstr(&ifp->addr, &b), (unsigned) port)); close(fd); return -1; } setportof(htons(pluto_port), &ifp->addr); #if defined(HAVE_UDPFROMTO) /* we are going to use udpfromto.c, so initialize it */ udpfromto_init(fd); #endif /* poke a hole for IKE messages in the IPsec layer */ if (kernel_ops->exceptsocket) { if (!(*kernel_ops->exceptsocket)(fd, AF_INET)) { close(fd); return -1; } } return fd; } void find_ifaces(void) { mark_ifaces_dead(); if (kernel_ops->process_ifaces) { #if !defined(__CYGWIN32__) kernel_ops->process_ifaces(find_raw_ifaces4()); kernel_ops->process_ifaces(find_raw_ifaces6()); #endif kernel_ops->process_ifaces(static_ifn); } free_dead_ifaces(); /* ditch remaining old entries */ if (interfaces == NULL) loglog(RC_LOG_SERIOUS, "no public interfaces found"); } void show_ifaces_status(void) { struct iface_port *p; for (p = interfaces; p != NULL; p = p->next) { ipstr_buf b; whack_log(RC_COMMENT, "interface %s/%s %s@%d", p->ip_dev->id_vname, p->ip_dev->id_rname, ipstr(&p->ip_addr, &b), p->port); } whack_log(RC_COMMENT, " "); /* spacer */ } void show_debug_status(void) { whack_log(RC_COMMENT, "debug %s", bitnamesof(debug_bit_names, cur_debugging)); } static volatile sig_atomic_t sighupflag = FALSE; static void huphandler(int sig UNUSED) { sighupflag = TRUE; } static volatile sig_atomic_t sigtermflag = FALSE; static void termhandler(int sig UNUSED) { sigtermflag = TRUE; } static volatile sig_atomic_t sigchildflag = FALSE; static void childhandler(int sig UNUSED) { sigchildflag = TRUE; } /* perform wait4() on all children */ static void reapchildren(void) { pid_t child; int status; struct rusage r; sigchildflag = FALSE; errno = 0; while ((child = wait3(&status, WNOHANG, &r)) > 0) { /* got a child to reap */ if (adns_reapchild(child, status)) continue; if (child == addconn_child_pid) { DBG(DBG_CONTROLMORE, DBG_log("reaped addconn helper child")); addconn_child_pid = 0; continue; } /* Threads are created instead of child processes when using LIBNSS */ libreswan_log("child pid=%d (status=%d) is not my child!", child, status); } if (child == -1) { libreswan_log("reapchild failed with errno=%d %s", errno, strerror(errno)); } } /* call_server listens for incoming ISAKMP packets and Whack messages, * and handles timer events. */ void call_server(void) { struct iface_port *ifp; /* catch SIGHUP and SIGTERM */ { int r; struct sigaction act; act.sa_handler = &huphandler; sigemptyset(&act.sa_mask); act.sa_flags = 0; /* no SA_ONESHOT, no SA_RESTART, no nothing */ r = sigaction(SIGHUP, &act, NULL); passert(r == 0); act.sa_handler = &termhandler; r = sigaction(SIGTERM, &act, NULL); passert(r == 0); act.sa_handler = &childhandler; act.sa_flags = SA_RESTART; r = sigaction(SIGCHLD, &act, NULL); passert(r == 0); } /* do_whacklisten() is now done by the addconn fork */ /* * fork to issue the command "ipsec addconn --autoall" * (or vfork() when fork() isn't available, eg on embedded platforms * without MMU, like uClibc */ { /* find a pathname to the addconn program */ const char *addconn_path = NULL; static const char addconn_name[] = "addconn"; char addconn_path_space[4096]; /* plenty long? */ ssize_t n = 0; #if !(defined(macintosh) || (defined(__MACH__) && defined(__APPLE__))) { /* The program will be in the same directory as Pluto, * so we use the sympolic link /proc/self/exe to * tell us of the path prefix. */ n = readlink("/proc/self/exe", addconn_path_space, sizeof(addconn_path_space)); if (n < 0) { # ifdef __uClibc__ /* on some nommu we have no proc/self/exe, try without path */ *addconn_path_space = '\0'; n = 0; # else exit_log_errno((e, "readlink(\"/proc/self/exe\") failed in call_server()")); # endif } } #else /* This is wrong. Should end up in a resource_dir on MacOSX -- Paul */ addconn_path = "/usr/local/libexec/ipsec/addconn"; #endif if ((size_t)n > sizeof(addconn_path_space) - sizeof(addconn_name)) exit_log("path to %s is too long", addconn_name); while (n > 0 && addconn_path_space[n - 1] != '/') n--; strcpy(addconn_path_space + n, addconn_name); addconn_path = addconn_path_space; if (access(addconn_path, X_OK) < 0) exit_log_errno((e, "%s missing or not executable", addconn_path)); char *newargv[] = { DISCARD_CONST(char *, "addconn"), DISCARD_CONST(char *, "--ctlbase"), DISCARD_CONST(char *, ctl_addr.sun_path), DISCARD_CONST(char *, "--autoall"), NULL }; char *newenv[] = { NULL }; #ifdef HAVE_NO_FORK addconn_child_pid = vfork(); /* for better, for worse, in sickness and health..... */ #else addconn_child_pid = fork(); #endif if (addconn_child_pid == 0) { /* child */ sleep(1); DBG(DBG_CONTROLMORE, DBG_log("calling addconn helper using execve")); execve(addconn_path, newargv, newenv); _exit(42); } /* parent continues */ } for (;; ) { lsw_fd_set readfds; lsw_fd_set writefds; int ndes; /* wait for next interesting thing */ for (;; ) { long next_time = next_event(); /* time to any pending timer event */ int maxfd = ctl_fd; if (sigtermflag) exit_pluto(0); if (sighupflag) { /* Ignorant folks think poking any daemon with SIGHUP * is polite. We catch it and tell them otherwise. * There is one use: unsticking a hung recvfrom. * This sticking happens sometimes -- kernel bug? */ sighupflag = FALSE; libreswan_log( "Pluto ignores SIGHUP -- perhaps you want \"whack --listen\""); } if (sigchildflag) reapchildren(); LSW_FD_ZERO(&readfds); LSW_FD_ZERO(&writefds); LSW_FD_SET(ctl_fd, &readfds); /* the only write file-descriptor of interest */ if (adns_qfd != NULL_FD && unsent_ADNS_queries) { if (maxfd < adns_qfd) maxfd = adns_qfd; LSW_FD_SET(adns_qfd, &writefds); } if (adns_afd != NULL_FD) { if (maxfd < adns_afd) maxfd = adns_afd; LSW_FD_SET(adns_afd, &readfds); } #ifdef KLIPS if (kern_interface != NO_KERNEL) { int fd = *kernel_ops->async_fdp; if (kernel_ops->process_queue) kernel_ops->process_queue(); if (maxfd < fd) maxfd = fd; passert(!LSW_FD_ISSET(fd, &readfds)); LSW_FD_SET(fd, &readfds); } #endif if (listening) { for (ifp = interfaces; ifp != NULL; ifp = ifp->next) { if (maxfd < ifp->fd) maxfd = ifp->fd; passert(!LSW_FD_ISSET(ifp->fd, &readfds)); LSW_FD_SET(ifp->fd, &readfds); } } /* see if helpers need attention */ enumerate_crypto_helper_response_sockets(&readfds); if (no_retransmits || next_time < 0) { /* select without timer */ ndes = lsw_select(maxfd + 1, &readfds, &writefds, NULL, NULL); } else if (next_time == 0) { /* timer without select: there is a timer event pending, * and it should fire now so don't bother to do the select. */ ndes = 0; /* signify timer expiration */ } else { /* select with timer */ struct timeval tm; tm.tv_sec = next_time; tm.tv_usec = 0; ndes = lsw_select(maxfd + 1, &readfds, &writefds, NULL, &tm); } if (ndes != -1) break; /* success */ if (errno != EINTR) exit_log_errno((e, "select() failed in call_server()")); /* retry if terminated by signal */ } DBG(DBG_CONTROL, DBG_log(" ")); /* figure out what is interesting */ /* do FD's before events are processed */ if (ndes > 0) { /* at least one file descriptor is ready */ if (adns_qfd != NULL_FD && LSW_FD_ISSET(adns_qfd, &writefds)) { passert(ndes > 0); send_unsent_ADNS_queries(); passert(GLOBALS_ARE_RESET()); ndes--; } if (adns_afd != NULL_FD && LSW_FD_ISSET(adns_afd, &readfds)) { passert(ndes > 0); DBG(DBG_CONTROL, DBG_log("*received adns message")); handle_adns_answer(); passert(GLOBALS_ARE_RESET()); ndes--; } #ifdef KLIPS if (kern_interface != NO_KERNEL && LSW_FD_ISSET(*kernel_ops->async_fdp, &readfds)) { passert(ndes > 0); DBG(DBG_CONTROL, DBG_log("*received kernel message")); kernel_ops->process_msg(); passert(GLOBALS_ARE_RESET()); ndes--; } #endif for (ifp = interfaces; ifp != NULL; ifp = ifp->next) { if (LSW_FD_ISSET(ifp->fd, &readfds)) { /* comm_handle will print DBG_CONTROL intro, * with more info than we have here. */ passert(ndes > 0); comm_handle(ifp); passert(GLOBALS_ARE_RESET()); ndes--; } } if (LSW_FD_ISSET(ctl_fd, &readfds)) { passert(ndes > 0); DBG(DBG_CONTROL, DBG_log("*received whack message")); whack_handle(ctl_fd); passert(GLOBALS_ARE_RESET()); ndes--; } /* note we process helper things last on purpose */ { int helpers = pluto_crypto_helper_response_ready( &readfds); DBG(DBG_CONTROL, DBG_log("* processed %d messages from cryptographic helpers\n", helpers)); ndes -= helpers; } passert(ndes == 0); } if (next_event() == 0 && !no_retransmits) { /* timer event ready */ DBG(DBG_CONTROL, DBG_log("*time to handle event")); handle_timer_event(); passert(GLOBALS_ARE_RESET()); } } } /* Process any message on the MSG_ERRQUEUE * * This information is generated because of the IP_RECVERR socket option. * The API is sparsely documented, and may be LINUX-only, and only on * fairly recent versions at that (hence the conditional compilation). * * - ip(7) describes IP_RECVERR * - recvmsg(2) describes MSG_ERRQUEUE * - readv(2) describes iovec * - cmsg(3) describes how to process auxilliary messages * * ??? we should link this message with one we've sent * so that the diagnostic can refer to that negotiation. * * ??? how long can the messge be? * * ??? poll(2) has a very incomplete description of the POLL* events. * We assume that POLLIN, POLLOUT, and POLLERR are all we need to deal with * and that POLLERR will be on iff there is a MSG_ERRQUEUE message. * * We have to code around a couple of surprises: * * - Select can say that a socket is ready to read from, and * yet a read will hang. It turns out that a message available on the * MSG_ERRQUEUE will cause select to say something is pending, but * a normal read will hang. poll(2) can tell when a MSG_ERRQUEUE * message is pending. * * This is dealt with by calling check_msg_errqueue after select * has indicated that there is something to read, but before the * read is performed. check_msg_errqueue will return TRUE if there * is something left to read. * * - A write to a socket may fail because there is a pending MSG_ERRQUEUE * message, without there being anything wrong with the write. This * makes for confusing diagnostics. * * To avoid this, we call check_msg_errqueue before a write. True, * there is a race condition (a MSG_ERRQUEUE message might arrive * between the check and the write), but we should eliminate many * of the problematic events. To narrow the window, the poll(2) * will await until an event happens (in the case or a write, * POLLOUT; this should be benign for POLLIN). */ #if defined(IP_RECVERR) && defined(MSG_ERRQUEUE) bool check_msg_errqueue(const struct iface_port *ifp, short interest) { struct pollfd pfd; pfd.fd = ifp->fd; pfd.events = interest | POLLPRI | POLLOUT; while (pfd.revents = 0, poll(&pfd, 1, -1) > 0 && (pfd.revents & POLLERR)) { u_int8_t buffer[3000]; /* hope that this is big enough */ union { struct sockaddr sa; struct sockaddr_in sa_in4; struct sockaddr_in6 sa_in6; } from; ssize_t packet_len; struct msghdr emh; struct iovec eiov; union { /* force alignment (not documented as necessary) */ struct cmsghdr ecms; /* how much space is enough? */ unsigned char space[256]; } ecms_buf; struct cmsghdr *cm; char fromstr[sizeof(" for message to port 65536") + INET6_ADDRSTRLEN]; struct state *sender = NULL; zero(&from.sa); emh.msg_name = &from.sa; /* ??? filled in? */ emh.msg_namelen = sizeof(from); emh.msg_iov = &eiov; emh.msg_iovlen = 1; emh.msg_control = &ecms_buf; emh.msg_controllen = sizeof(ecms_buf); emh.msg_flags = 0; eiov.iov_base = buffer; /* see readv(2) */ eiov.iov_len = sizeof(buffer); packet_len = recvmsg(ifp->fd, &emh, MSG_ERRQUEUE); if (packet_len == -1) { log_errno((e, "recvmsg(,, MSG_ERRQUEUE) on %s failed in comm_handle", ifp->ip_dev->id_rname)); break; } else if (packet_len == sizeof(buffer)) { libreswan_log( "MSG_ERRQUEUE message longer than %lu bytes; truncated", (unsigned long) sizeof(buffer)); } else { sender = find_sender((size_t) packet_len, buffer); } DBG_cond_dump(DBG_ALL, "rejected packet:\n", buffer, packet_len); DBG_cond_dump(DBG_ALL, "control:\n", emh.msg_control, emh.msg_controllen); /* ??? Andi Kleen and misc documentation * suggests that name will have the original destination * of the packet. We seem to see msg_namelen == 0. * Andi says that this is a kernel bug and has fixed it. * Perhaps in 2.2.18/2.4.0. */ passert(emh.msg_name == &from.sa); DBG_cond_dump(DBG_ALL, "name:\n", emh.msg_name, emh.msg_namelen); fromstr[0] = '\0'; /* usual case :-( */ switch (from.sa.sa_family) { char as[INET6_ADDRSTRLEN]; case AF_INET: if (emh.msg_namelen == sizeof(struct sockaddr_in)) snprintf(fromstr, sizeof(fromstr), " for message to %s port %u", inet_ntop(from.sa.sa_family, &from.sa_in4.sin_addr, as, sizeof(as)), ntohs(from.sa_in4.sin_port)); break; case AF_INET6: if (emh.msg_namelen == sizeof(struct sockaddr_in6)) snprintf(fromstr, sizeof(fromstr), " for message to %s port %u", inet_ntop(from.sa.sa_family, &from.sa_in6.sin6_addr, as, sizeof(as)), ntohs(from.sa_in6.sin6_port)); break; } for (cm = CMSG_FIRSTHDR(&emh) ; cm != NULL ; cm = CMSG_NXTHDR(&emh, cm)) { if (cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) { /* ip(7) and recvmsg(2) specify: * ee_origin is SO_EE_ORIGIN_ICMP for ICMP * or SO_EE_ORIGIN_LOCAL for locally generated errors. * ee_type and ee_code are from the ICMP header. * ee_info is the discovered MTU for EMSGSIZE errors * ee_data is not used. * * ??? recvmsg(2) says "SOCK_EE_OFFENDER" but * means "SO_EE_OFFENDER". The OFFENDER is really * the router that complained. As such, the port * is meaningless. */ /* ??? cmsg(3) claims that CMSG_DATA returns * void *, but RFC 2292 and /usr/include/bits/socket.h * say unsigned char *. The manual is being fixed. */ struct sock_extended_err *ee = (void *)CMSG_DATA(cm); const char *offstr = "unspecified"; char offstrspace[INET6_ADDRSTRLEN]; char orname[50]; if (cm->cmsg_len > CMSG_LEN(sizeof(struct sock_extended_err))) { const struct sockaddr *offender = SO_EE_OFFENDER(ee); switch (offender->sa_family) { case AF_INET: offstr = inet_ntop( offender->sa_family, &((const struct sockaddr_in *) offender)->sin_addr, offstrspace, sizeof(offstrspace)); break; case AF_INET6: offstr = inet_ntop( offender->sa_family, &((const struct sockaddr_in6 *)offender)->sin6_addr, offstrspace, sizeof(offstrspace)); break; default: offstr = "unknown"; break; } } switch (ee->ee_origin) { case SO_EE_ORIGIN_NONE: snprintf(orname, sizeof(orname), "none"); break; case SO_EE_ORIGIN_LOCAL: snprintf(orname, sizeof(orname), "local"); break; case SO_EE_ORIGIN_ICMP: snprintf(orname, sizeof(orname), "ICMP type %d code %d (not authenticated)", ee->ee_type, ee->ee_code); break; case SO_EE_ORIGIN_ICMP6: snprintf(orname, sizeof(orname), "ICMP6 type %d code %d (not authenticated)", ee->ee_type, ee->ee_code); break; default: snprintf(orname, sizeof(orname), "invalid origin %lu", (unsigned long) ee->ee_origin); break; } if (packet_len == 1 && buffer[0] == 0xff && (cur_debugging & DBG_NATT) == 0) { /* don't log NAT-T keepalive related errors unless NATT debug is * enabled */ } else { struct state *old_state = cur_state; cur_state = sender; /* note dirty trick to suppress ~ at start of format * if we know what state to blame. */ libreswan_log((sender != NULL) + "~" "ERROR: asynchronous network error report on %s (sport=%d)" "%s" ", complainant %s" ": %s" " [errno %lu, origin %s" /* ", pad %d, info %ld" */ /* ", data %ld" */ "]", ifp->ip_dev->id_rname, ifp->port, fromstr, offstr, strerror(ee->ee_errno), (unsigned long) ee->ee_errno, orname /* , ee->ee_pad, (unsigned long)ee->ee_info */ /* , (unsigned long)ee->ee_data */ ); cur_state = old_state; } } else if (cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_PKTINFO) { } else { /* .cmsg_len is a kernel_size_t(!), but the value * certainly ought to fit in an unsigned long. */ libreswan_log( "unknown cmsg: level %d, type %d, len %lu", cm->cmsg_level, cm->cmsg_type, (unsigned long) cm->cmsg_len); } } } return (pfd.revents & interest) != 0; } #endif /* defined(IP_RECVERR) && defined(MSG_ERRQUEUE) */ /* send_ike_msg logic is broken into layers. * The rest of the system thinks it is simple. * We have three entrypoints that control options * for reporting write failure and actions on resending (fragment?): * send_ike_msg(), resend_ike_v1_msg(), and send_keepalive(). * * The first two call send_or_resend_ike_msg(). * That handles an IKE message. * It calls send_frags() if the message needs to be fragmented. * Otherwise it calls send_packet() to send it in one gulp. * * send_frags() breaks an IKE message into fragments and sends * them by send_packet(). * * send_keepalive() calls send_packet() directly: uses a special * tiny packet; non-ESP marker does not apply; logging on write error * is suppressed. * * send_packet() sends a UDP packet, possibly prefixed by a non-ESP Marker * for NATT. It accepts two chunks because this avoids double-copying. */ static bool send_packet(struct state *st, const char *where, bool just_a_keepalive, const u_int8_t *aptr, size_t alen, const u_int8_t *bptr, size_t blen) { /* NOTE: on system with limited stack, buf could be made static */ u_int8_t buf[MAX_OUTPUT_UDP_SIZE]; /* Each fragment, if we are doing NATT, needs a non-ESP_Marker prefix. * natt_bonus is the size of the addition (0 if not needed). */ const size_t natt_bonus = !just_a_keepalive && st->st_interface->ike_float ? NON_ESP_MARKER_SIZE : 0; const u_int8_t *ptr; unsigned long len = natt_bonus + alen + blen; ssize_t wlen; if (len > MAX_OUTPUT_UDP_SIZE) { DBG_log("send_ike_msg(): really too big %lu bytes", (unsigned long) len); return FALSE; } if (len != alen) { /* copying required */ /* 1. non-ESP Marker (0x00 octets) */ memset(buf, 0x00, natt_bonus); /* 2. chunk a */ memcpy(buf + natt_bonus, aptr, alen); /* 3. chunk b */ memcpy(buf + natt_bonus + alen, bptr, blen); ptr = buf; } else { ptr = aptr; } DBG(DBG_CONTROL | DBG_RAW, { ipstr_buf b; DBG_log("sending %lu bytes for %s through %s:%d to %s:%u (using #%lu)", (unsigned long) len, where, st->st_interface->ip_dev->id_rname, st->st_interface->port, ipstr(&st->st_remoteaddr, &b), st->st_remoteport, st->st_serialno); }); DBG(DBG_RAW, DBG_dump(NULL, ptr, len)); setportof(htons(st->st_remoteport), &st->st_remoteaddr); #if defined(IP_RECVERR) && defined(MSG_ERRQUEUE) (void) check_msg_errqueue(st->st_interface, POLLOUT); #endif /* defined(IP_RECVERR) && defined(MSG_ERRQUEUE) */ wlen = sendto(st->st_interface->fd, ptr, len, 0, sockaddrof(&st->st_remoteaddr), sockaddrlenof(&st->st_remoteaddr)); if (wlen != (ssize_t)len) { if (just_a_keepalive) { ipstr_buf b; log_errno((e, "sendto on %s to %s:%u failed in %s", st->st_interface->ip_dev->id_rname, ipstr(&st->st_remoteaddr, &b), st->st_remoteport, where)); } return FALSE; } /* Send a duplicate packet when this impair is enabled - used for testing */ if (DBGP(IMPAIR_JACOB_TWO_TWO)) { /* sleep for half a second, and second another packet */ usleep(500000); ipstr_buf b; DBG_log("JACOB 2-2: resending %lu bytes for %s through %s:%d to %s:%u:", (unsigned long) len, where, st->st_interface->ip_dev->id_rname, st->st_interface->port, ipstr(&st->st_remoteaddr, &b), st->st_remoteport); wlen = sendto(st->st_interface->fd, ptr, len, 0, sockaddrof(&st->st_remoteaddr), sockaddrlenof(&st->st_remoteaddr)); if (wlen != (ssize_t)len) { if (just_a_keepalive) { log_errno((e, "sendto on %s to %s:%u failed in %s", st->st_interface->ip_dev->id_rname, ipstr(&st->st_remoteaddr, &b), st->st_remoteport, where)); } return FALSE; } } return TRUE; } /* * non-IETF magic voodoo we need to consider for interop: * - www.cisco.com/en/US/docs/ios/sec_secure_connectivity/configuration/guide/sec_fragment_ike_pack.html * - www.cisco.com/en/US/docs/ios-xml/ios/sec_conn_ikevpn/configuration/15-mt/sec-fragment-ike-pack.pdf * - msdn.microsoft.com/en-us/library/cc233452.aspx * - iOS/Apple racoon source ipsec-164.9 at www.opensource.apple.com (frak length 1280) * - stock racoon source (frak length 552) */ static bool send_frags(struct state *st, const char *where) { unsigned int fragnum = 0; /* Each fragment, if we are doing NATT, needs a non-ESP_Marker prefix. * natt_bonus is the size of the addition (0 if not needed). */ const size_t natt_bonus = st->st_interface->ike_float ? NON_ESP_MARKER_SIZE : 0; /* We limit fragment packets to ISAKMP_FRAG_MAXLEN octets. * max_data_len is the maximum data length that will fit within it. */ const size_t max_data_len = ((st->st_connection->addr_family == AF_INET) ? ISAKMP_FRAG_MAXLEN_IPv4 : ISAKMP_FRAG_MAXLEN_IPv6) - (natt_bonus + NSIZEOF_isakmp_hdr + NSIZEOF_isakmp_ikefrag); u_int8_t *packet_cursor = st->st_tpacket.ptr; size_t packet_remainder_len = st->st_tpacket.len; /* BUG: this code does not use the marshalling code * in packet.h to translate between wire and host format. * This is dangerous. The following assertion should * fail in most cases where this cheat won't work. */ passert(sizeof(struct isakmp_hdr) == NSIZEOF_isakmp_hdr && sizeof(struct isakmp_ikefrag) == NSIZEOF_isakmp_ikefrag); while (packet_remainder_len > 0) { u_int8_t frag_prefix[NSIZEOF_isakmp_hdr + NSIZEOF_isakmp_ikefrag]; const size_t data_len = packet_remainder_len > max_data_len ? max_data_len : packet_remainder_len; const size_t fragpl_len = NSIZEOF_isakmp_ikefrag + data_len; const size_t isakmppl_len = NSIZEOF_isakmp_hdr + fragpl_len; fragnum++; /* emit isakmp header derived from original */ { struct isakmp_hdr *ih = (struct isakmp_hdr*) frag_prefix; memcpy(ih, st->st_tpacket.ptr, NSIZEOF_isakmp_hdr); ih->isa_np = ISAKMP_NEXT_IKE_FRAGMENTATION; /* one octet */ /* Do we need to set any of ISAKMP_FLAG_ENCRYPTION, * ISAKMP_FLAGS_MSG_R or ISAKMP_FLAGS_IKE_I ? * seems there might be disagreement between Cisco and Microsoft. * st->st_suspended_md->hdr.isa_flags; TODO must this be set? */ ih->isa_flags &= ~ISAKMP_FLAG_ENCRYPTION; ih->isa_length = htonl(isakmppl_len); } /* Append the ike frag header */ { struct isakmp_ikefrag *fh = (struct isakmp_ikefrag*) (frag_prefix + NSIZEOF_isakmp_hdr); fh->isafrag_np = 0; /* must be zero */ fh->isafrag_reserved = 0; /* reserved at this time, must be zero */ fh->isafrag_length = htons(fragpl_len); fh->isafrag_id = htons(1); /* In theory required to be unique, in practise not needed? */ fh->isafrag_number = fragnum; /* one byte, no htons() call needed */ fh->isafrag_flags = packet_remainder_len == data_len ? ISAKMP_FRAG_LAST : 0; } DBG(DBG_CONTROL, DBG_log("sending IKE fragment id '%d', number '%u'%s", 1, /* hard coded for now, seems to be what all the cool implementations do */ fragnum, packet_remainder_len == data_len ? " (last)" : "")); if (!send_packet(st, where, FALSE, frag_prefix, NSIZEOF_isakmp_hdr + NSIZEOF_isakmp_ikefrag, packet_cursor, data_len)) return FALSE; packet_remainder_len -= data_len; packet_cursor += data_len; } return TRUE; } static bool send_or_resend_ike_msg(struct state *st, const char *where, bool resending) { size_t len = st->st_tpacket.len; /* Each fragment, if we are doing NATT, needs a non-ESP_Marker prefix. * natt_bonus is the size of the addition (0 if not needed). */ const size_t natt_bonus = st->st_interface->ike_float ? NON_ESP_MARKER_SIZE : 0; /* decide of whether we're to fragment - IKEv1 only, draft-smyslov-ipsecme-ikev2-fragmentation not implemented yet */ if (!st->st_ikev2 && st->st_state != STATE_MAIN_I1 && len + natt_bonus >= (st->st_connection->addr_family == AF_INET ? ISAKMP_FRAG_MAXLEN_IPv4 : ISAKMP_FRAG_MAXLEN_IPv6) && ((resending && (st->st_connection->policy & POLICY_IKE_FRAG_ALLOW) && st->st_seen_fragvid) || ((st->st_connection->policy & POLICY_IKE_FRAG_FORCE) || st->st_seen_fragments))) return send_frags(st, where); else return send_packet(st, where, FALSE, st->st_tpacket.ptr, st->st_tpacket.len, NULL, (size_t) 0); } bool send_ike_msg(struct state *st, const char *where) { return send_or_resend_ike_msg(st, where, FALSE); } bool resend_ike_v1_msg(struct state *st, const char *where) { return send_or_resend_ike_msg(st, where, TRUE); } /* send keepalive is special in two ways: * We don't want send errors logged (too noisy). * We don't want the packet prefixed with a non-ESP Marker. */ bool send_keepalive(struct state *st, const char *where) { static const unsigned char ka_payload = 0xff; return send_packet(st, where, TRUE, &ka_payload, sizeof(ka_payload), NULL, (size_t) 0); }