slirp: Forward ICMP echo requests via unprivileged sockets

Linux 3.0 gained support for unprivileged ICMP ping sockets. Use this
feature to forward guest pings to the outer world. The host admin has to
set the ping_group_range in order to grant access to those sockets. To
allow ping for the users group (GID 100):

echo 100 100 > /proc/sys/net/ipv4/ping_group_range

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
diff --git a/ip_icmp.c b/ip_icmp.c
index 42df178..2ccf19d 100644
--- a/ip_icmp.c
+++ b/ip_icmp.c
@@ -62,6 +62,52 @@
     /* ADDR MASK REPLY (18) */ 0
 };
 
+void icmp_init(Slirp *slirp)
+{
+    slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp;
+    slirp->icmp_last_so = &slirp->icmp;
+}
+
+static int icmp_send(struct socket *so, struct mbuf *m, int hlen)
+{
+    struct ip *ip = mtod(m, struct ip *);
+    struct sockaddr_in addr;
+
+    so->s = qemu_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+    if (so->s == -1) {
+        return -1;
+    }
+
+    so->so_m = m;
+    so->so_faddr = ip->ip_dst;
+    so->so_laddr = ip->ip_src;
+    so->so_iptos = ip->ip_tos;
+    so->so_type = IPPROTO_ICMP;
+    so->so_state = SS_ISFCONNECTED;
+    so->so_expire = curtime + SO_EXPIRE;
+
+    addr.sin_family = AF_INET;
+    addr.sin_addr = so->so_faddr;
+
+    insque(so, &so->slirp->icmp);
+
+    if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0,
+               (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+        DEBUG_MISC((dfd, "icmp_input icmp sendto tx errno = %d-%s\n", errno,
+                    strerror(errno)));
+        icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno));
+        icmp_detach(so);
+    }
+
+    return 0;
+}
+
+void icmp_detach(struct socket *so)
+{
+    closesocket(so->s);
+    sofree(so);
+}
+
 /*
  * Process a received ICMP message.
  */
@@ -98,7 +144,6 @@
     DEBUG_ARG("icmp_type = %d", icp->icmp_type);
     switch (icp->icmp_type) {
     case ICMP_ECHO:
-        icp->icmp_type = ICMP_ECHOREPLY;
         ip->ip_len += hlen; /* since ip_input subtracts this */
         if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) {
             icmp_reflect(m);
@@ -109,6 +154,9 @@
             struct sockaddr_in addr;
             if ((so = socreate(slirp)) == NULL)
                 goto freeit;
+            if (icmp_send(so, m, hlen) == 0) {
+                return;
+            }
             if (udp_attach(so) == -1) {
                 DEBUG_MISC((dfd, "icmp_input udp_attach errno = %d-%s\n", errno,
                             strerror(errno)));
@@ -331,6 +379,7 @@
     m->m_len -= hlen;
     icp = mtod(m, struct icmp *);
 
+    icp->icmp_type = ICMP_ECHOREPLY;
     icp->icmp_cksum = 0;
     icp->icmp_cksum = cksum(m, ip->ip_len - hlen);
 
@@ -361,3 +410,39 @@
 
     (void)ip_output((struct socket *)NULL, m);
 }
+
+void icmp_receive(struct socket *so)
+{
+    struct mbuf *m = so->so_m;
+    struct ip *ip = mtod(m, struct ip *);
+    int hlen = ip->ip_hl << 2;
+    u_char error_code;
+    struct icmp *icp;
+    int id, len;
+
+    m->m_data += hlen;
+    m->m_len -= hlen;
+    icp = mtod(m, struct icmp *);
+
+    id = icp->icmp_id;
+    len = recv(so->s, icp, m->m_len, 0);
+    icp->icmp_id = id;
+
+    m->m_data -= hlen;
+    m->m_len += hlen;
+
+    if (len == -1 || len == 0) {
+        if (errno == ENETUNREACH) {
+            error_code = ICMP_UNREACH_NET;
+        } else {
+            error_code = ICMP_UNREACH_HOST;
+        }
+        DEBUG_MISC(
+            (dfd, " udp icmp rx errno = %d-%s\n", errno, strerror(errno)));
+        icmp_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno));
+    } else {
+        icmp_reflect(so->so_m);
+        so->so_m = NULL; /* Don't m_free() it again! */
+    }
+    icmp_detach(so);
+}
diff --git a/ip_icmp.h b/ip_icmp.h
index f035dc5..7c006ff 100644
--- a/ip_icmp.h
+++ b/ip_icmp.h
@@ -153,9 +153,12 @@
      (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY ||             \
      (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
 
+void icmp_init(Slirp *slirp);
 void icmp_input(struct mbuf *, int);
 void icmp_error(struct mbuf *msrc, u_char type, u_char code, int minsize,
                 const char *message);
 void icmp_reflect(struct mbuf *);
+void icmp_receive(struct socket *so);
+void icmp_detach(struct socket *so);
 
 #endif
diff --git a/ip_input.c b/ip_input.c
index 74879fd..2f4e463 100644
--- a/ip_input.c
+++ b/ip_input.c
@@ -56,6 +56,7 @@
     slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link;
     udp_init(slirp);
     tcp_init(slirp);
+    icmp_init(slirp);
 }
 
 /*
diff --git a/misc.c b/misc.c
index a3643fd..a24d2cb 100644
--- a/misc.c
+++ b/misc.c
@@ -392,4 +392,17 @@
         monitor_printf(mon, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr),
                        ntohs(dst_port), so->so_rcv.sb_cc, so->so_snd.sb_cc);
     }
+
+    for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) {
+        n = snprintf(buf, sizeof(buf), "  ICMP[%d sec]",
+                     (so->so_expire - curtime) / 1000);
+        src.sin_addr = so->so_laddr;
+        dst_addr = so->so_faddr;
+        memset(&buf[n], ' ', 19 - n);
+        buf[19] = 0;
+        monitor_printf(mon, "%s %3d %15s  -    ", buf, so->s,
+                       src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*");
+        monitor_printf(mon, "%15s  -    %5d %5d\n", inet_ntoa(dst_addr),
+                       so->so_rcv.sb_cc, so->so_snd.sb_cc);
+    }
 }
diff --git a/slirp.c b/slirp.c
index 549ee1d..55f35a1 100644
--- a/slirp.c
+++ b/slirp.c
@@ -376,6 +376,30 @@
                 UPD_NFDS(so->s);
             }
         }
+
+        /*
+         * ICMP sockets
+         */
+        for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) {
+            so_next = so->so_next;
+
+            /*
+             * See if it's timed out
+             */
+            if (so->so_expire) {
+                if (so->so_expire <= curtime) {
+                    icmp_detach(so);
+                    continue;
+                } else {
+                    do_slowtimo = 1; /* Let socket expire */
+                }
+            }
+
+            if (so->so_state & SS_ISFCONNECTED) {
+                FD_SET(so->s, readfds);
+                UPD_NFDS(so->s);
+            }
+        }
     }
 
     *pnfds = nfds;
@@ -542,6 +566,17 @@
                     sorecvfrom(so);
                 }
             }
+
+            /*
+             * Check incoming ICMP relies.
+             */
+            for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) {
+                so_next = so->so_next;
+
+                if (so->s != -1 && FD_ISSET(so->s, readfds)) {
+                    icmp_receive(so);
+                }
+            }
         }
 
         /*
diff --git a/slirp.h b/slirp.h
index 7a0f2da..c8b97cf 100644
--- a/slirp.h
+++ b/slirp.h
@@ -152,6 +152,7 @@
 #include "tcp_var.h"
 #include "tcpip.h"
 #include "udp.h"
+#include "ip_icmp.h"
 #include "mbuf.h"
 #include "sbuf.h"
 #include "socket.h"
@@ -218,6 +219,10 @@
     struct socket udb;
     struct socket *udp_last_so;
 
+    /* icmp states */
+    struct socket icmp;
+    struct socket *icmp_last_so;
+
     /* tftp states */
     char *tftp_prefix;
     struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX];
diff --git a/socket.c b/socket.c
index c920000..00b6bc2 100644
--- a/socket.c
+++ b/socket.c
@@ -65,6 +65,8 @@
         slirp->tcp_last_so = &slirp->tcb;
     } else if (so == slirp->udp_last_so) {
         slirp->udp_last_so = &slirp->udb;
+    } else if (so == slirp->icmp_last_so) {
+        slirp->icmp_last_so = &slirp->icmp;
     }
     m_free(so->so_m);