#!/bin/sh
# This code should (try to) follow Google's Shell Style Guide
# (https://google.github.io/styleguide/shell.xml)
set -e

case "$1" in
  --wait)
    WAIT=1
    ;;
  --direct-phys)
    DIRECT_PHYS=1
    shift
    ;;
esac

IFNAME=$1

# default value set further down if not set here
CONTAINER_IFNAME=
if [ "$2" = "-i" ]; then
  CONTAINER_IFNAME=$3
  shift 2
fi

if [ "$2" = "-l" ]; then
  LOCAL_IFNAME=$3
  shift 2
fi

#inet or inet6
FAMILY_FLAG="-4"
if [ "$2" = "-a" ]; then
  FAMILY_FLAG="-$3"
  shift 2
fi

GUESTNAME=$2
IPADDR=$3
MACADDR=$4

case "$MACADDR" in
  *@*)
    VLAN="${MACADDR#*@}"
    VLAN="${VLAN%%@*}"
    MACADDR="${MACADDR%%@*}"
    ;;
  *)
    VLAN=
    ;;
esac

# did they ask to generate a custom MACADDR?
# generate the unique string
case "$MACADDR" in
  U:*)
    macunique="${MACADDR#*:}"
    # now generate a 48-bit hash string from $macunique
    MACADDR=$(echo $macunique|md5sum|sed 's/^\(..\)\(..\)\(..\)\(..\)\(..\).*$/02:\1:\2:\3:\4:\5/')
   ;;
esac


[ "$IPADDR" ] || [ "$WAIT" ] || {
  echo "Syntax:"
  echo "pipework <hostinterface> [-i containerinterface] [-l localinterfacename] [-a addressfamily] <guest> <ipaddr>/<subnet>[@default_gateway] [macaddr][@vlan]"
  echo "pipework <hostinterface> [-i containerinterface] [-l localinterfacename] <guest> dhcp [macaddr][@vlan]"
  echo "pipework mac:<hostinterface_macaddress> [-i containerinterface] [-l localinterfacename] [-a addressfamily] <guest> <ipaddr>/<subnet>[@default_gateway] [macaddr][@vlan]"
  echo "pipework mac:<hostinterface_macaddress> [-i containerinterface] [-l localinterfacename] <guest> dhcp [macaddr][@vlan]"
  echo "pipework route <guest> <route_command>"
  echo "pipework rule <guest> <rule_command>"
  echo "pipework tc <guest> <tc_command>"
  echo "pipework --wait [-i containerinterface]"
  exit 1
}

# Succeed if the given utility is installed. Fail otherwise.
# For explanations about `which` vs `type` vs `command`, see:
# http://stackoverflow.com/questions/592620/check-if-a-program-exists-from-a-bash-script/677212#677212
# (Thanks to @chenhanxiao for pointing this out!)
installed () {
  command -v "$1" >/dev/null 2>&1
}

# Google Styleguide says error messages should go to standard error.
warn () {
  echo "$@" >&2
}
die () {
  status="$1"
  shift
  warn "$@"
  exit "$status"
}

if echo $IFNAME | grep -q '^mac:'; then
  mac_address=$(echo $IFNAME | cut -c5-)
  ifmatch=
  for iftest in /sys/class/net/*; do
    if [ -f $iftest/address ] && [ "$mac_address" = "$(cat $iftest/address)" ]; then
      ifmatch="$(basename $iftest)"
      break
    fi
  done

  if [ -z "$ifmatch" ]; then
    die 1 "Mac address $mac_address specified for interface but no host interface matched."
  else
    IFNAME=$ifmatch
  fi
fi

# First step: determine type of first argument (bridge, physical interface...),
# Unless "--wait" is set (then skip the whole section)
if [ -z "$WAIT" ]; then
  if [ -d "/sys/class/net/$IFNAME" ]
  then
    if [ -d "/sys/class/net/$IFNAME/bridge" ]; then
      IFTYPE=bridge
      BRTYPE=linux
    elif installed ovs-vsctl && ovs-vsctl list-br|grep -q "^${IFNAME}$"; then
      IFTYPE=bridge
      BRTYPE=openvswitch
    elif [ "$(cat "/sys/class/net/$IFNAME/type")" -eq 32 ]; then # InfiniBand IPoIB interface type 32
      IFTYPE=ipoib
      # The IPoIB kernel module is fussy, set device name to ib0 if not overridden
      CONTAINER_IFNAME=${CONTAINER_IFNAME:-ib0}
      PKEY=$VLAN
    else IFTYPE=phys
    fi
  else
    case "$IFNAME" in
      br*)
        IFTYPE=bridge
        BRTYPE=linux
        ;;
      ovs*)
        if ! installed ovs-vsctl; then
          die 1 "Need OVS installed on the system to create an ovs bridge"
        fi
        IFTYPE=bridge
        BRTYPE=openvswitch
        ;;
      route*)
        IFTYPE=route
        ;;
      rule*)
        IFTYPE=rule
        ;;
      tc*)
        IFTYPE=tc
        ;;
      dummy*)
        IFTYPE=dummy
        ;;
      *) die 1 "I do not know how to setup interface $IFNAME." ;;
    esac
  fi
fi

# Set the default container interface name to eth1 if not already set
CONTAINER_IFNAME=${CONTAINER_IFNAME:-eth1}

[ "$WAIT" ] && {
  while true; do
    # This first method works even without `ip` or `ifconfig` installed,
    # but doesn't work on older kernels (e.g. CentOS 6.X). See #128.
    grep -q '^1$' "/sys/class/net/$CONTAINER_IFNAME/carrier" && break
    # This method hopefully works on those older kernels.
    ip link ls dev "$CONTAINER_IFNAME" && break
    sleep 1
  done > /dev/null 2>&1
  exit 0
}

[ "$IFTYPE" = bridge ] && [ "$BRTYPE" = linux ] && [ "$VLAN" ] && {
  die 1 "VLAN configuration currently unsupported for Linux bridge."
}

[ "$IFTYPE" = ipoib ] && [ "$MACADDR" ] && {
  die 1 "MACADDR configuration unsupported for IPoIB interfaces."
}

# Second step: find the guest (for now, we only support LXC containers)
while read _ mnt fstype options _; do
  [ "$fstype" != "cgroup" ] && continue
  echo "$options" | grep -qw devices || continue
  CGROUPMNT=$mnt
done < /proc/mounts

[ "$CGROUPMNT" ] || {
    die 1 "Could not locate cgroup mount point."
}

# Try to find a cgroup matching exactly the provided name.
M=$(find "$CGROUPMNT" -name "$GUESTNAME")
N=$(echo "$M" | wc -l)
if [ -z "$M" ] ; then
    N=0
fi
case "$N" in
  0)
    # If we didn't find anything, try to lookup the container with Docker.
    if installed docker; then
      RETRIES=3
      while [ "$RETRIES" -gt 0 ]; do
        DOCKERPID=$(docker inspect --format='{{ .State.Pid }}' "$GUESTNAME")
        DOCKERCID=$(docker inspect --format='{{ .ID }}' "$GUESTNAME")
        DOCKERCNAME=$(docker inspect --format='{{ .Name }}' "$GUESTNAME")

        [ "$DOCKERPID" != 0 ] && break
        sleep 1
        RETRIES=$((RETRIES - 1))
      done

      [ "$DOCKERPID" = 0 ] && {
        die 1 "Docker inspect returned invalid PID 0"
      }

      [ "$DOCKERPID" = "<no value>" ] && {
        die 1 "Container $GUESTNAME not found, and unknown to Docker."
      }
    else
      die 1 "Container $GUESTNAME not found, and Docker not installed."
    fi
    ;;
  1) true ;;
  2)  # LXC >=3.1.0 returns two entries from the cgroups mount instead of one.
      echo "$M" | grep -q "lxc\.monitor" || die 1 "Found more than one container matching $GUESTNAME."
      ;;
  *) die 1 "Found more than one container matching $GUESTNAME." ;;
esac

# only check IPADDR if we are not in a route mode
[ "$IFTYPE" != route ] && [ "$IFTYPE" != rule ] && [ "$IFTYPE" != tc ] && {
  case "$IPADDR" in
	  # Let's check first if the user asked for DHCP allocation.
	  dhcp|dhcp:*)
	    # Use Docker-specific strategy to run the DHCP client
	    # from the busybox image, in the network namespace of
	    # the container.
	    if ! [ "$DOCKERPID" ]; then
	      warn "You asked for a Docker-specific DHCP method."
	      warn "However, $GUESTNAME doesn't seem to be a Docker container."
	      warn "Try to replace 'dhcp' with another option?"
	      die 1 "Aborting."
	    fi
	    DHCP_CLIENT=${IPADDR%%:*}
	    ;;
	  udhcpc|udhcpc:*|udhcpc-f|udhcpc-f:*|dhcpcd|dhcpcd:*|dhclient|dhclient:*|dhclient-f|dhclient-f:*)
	    DHCP_CLIENT=${IPADDR%%:*}
	    # did they ask for the client to remain?
	    DHCP_FOREGROUND=
	    [ "${DHCP_CLIENT%-f}" != "${DHCP_CLIENT}" ] && {
	      DHCP_FOREGROUND=true
	    }
	    DHCP_CLIENT=${DHCP_CLIENT%-f}
	    if ! installed "$DHCP_CLIENT"; then
	      die 1 "You asked for DHCP client $DHCP_CLIENT, but I can't find it."
	    fi
	    ;;
	  # Alright, no DHCP? Then let's see if we have a subnet *and* gateway.
	  */*@*)
	    GATEWAY="${IPADDR#*@}" GATEWAY="${GATEWAY%%@*}"
	    IPADDR="${IPADDR%%@*}"
	    ;;
	  # No gateway? We need at least a subnet, anyway!
	  */*) : ;;
	  # ... No? Then stop right here.
	  *)
	    warn "The IP address should include a netmask."
	    die 1 "Maybe you meant $IPADDR/24 ?"
	    ;;
  esac
}

# If a DHCP method was specified, extract the DHCP options.
if [ "$DHCP_CLIENT" ]; then
  case "$IPADDR" in
    *:*) DHCP_OPTIONS="${IPADDR#*:}" ;;
  esac
fi

if [ "$DOCKERPID" ]; then
  NSPID=$DOCKERPID
else
  NSPID=$(head -n 1 "$(find "$CGROUPMNT" -name "$GUESTNAME" | head -n 1)/tasks")
  [ "$NSPID" ] || {
    # it is an alternative way to get the pid
    NSPID=$(lxc-info -n  "$GUESTNAME" | grep PID | grep -Eo '[0-9]+')
    [ "$NSPID" ] || {
      die 1 "Could not find a process inside container $GUESTNAME."
    }
  }
fi

# Check if an incompatible VLAN device already exists
[ "$IFTYPE" = phys ] && [ "$VLAN" ] && [ -d "/sys/class/net/$IFNAME.VLAN" ] && {
  ip -d link show "$IFNAME.$VLAN" | grep -q "vlan.*id $VLAN" || {
    die 1 "$IFNAME.VLAN already exists but is not a VLAN device for tag $VLAN"
  }
}

[ ! -d /var/run/netns ] && mkdir -p /var/run/netns
rm -f "/var/run/netns/$NSPID"
ln -s "/proc/$NSPID/ns/net" "/var/run/netns/$NSPID"

# Check if we need to create a bridge.
[ "$IFTYPE" = bridge ] && [ ! -d "/sys/class/net/$IFNAME" ] && {
  [ "$BRTYPE" = linux ] && {
    (ip link add dev "$IFNAME" type bridge > /dev/null 2>&1) || (brctl addbr "$IFNAME")
    ip link set "$IFNAME" up
  }
  [ "$BRTYPE" = openvswitch ] && {
    ovs-vsctl add-br "$IFNAME"
  }
}

[ "$IFTYPE" != "route" ] && [ "$IFTYPE" != "dummy" ] && [ "$IFTYPE" != "rule" ] && [ "$IFTYPE" != "tc" ] && MTU=$(ip link show "$IFNAME" | awk '{print $5}')

# If it's a bridge, we need to create a veth pair
[ "$IFTYPE" = bridge ] && {
  if [ -z "$LOCAL_IFNAME" ]; then
    LOCAL_IFNAME="v${CONTAINER_IFNAME}pl${NSPID}"
  fi
  GUEST_IFNAME="v${CONTAINER_IFNAME}pg${NSPID}"
  # Does the link already exist?
  if ip link show "$LOCAL_IFNAME" >/dev/null 2>&1; then
    # link exists, is it in use?
    if ip link show "$LOCAL_IFNAME" up | grep -q "UP"; then
      echo "Link $LOCAL_IFNAME exists and is up"
      exit 1
    fi
    # delete the link so we can re-add it afterwards
    ip link del "$LOCAL_IFNAME"
  fi
  ip link add name "$LOCAL_IFNAME" mtu "$MTU" type veth peer name "$GUEST_IFNAME" mtu "$MTU"
  case "$BRTYPE" in
    linux)
      (ip link set "$LOCAL_IFNAME" master "$IFNAME" > /dev/null 2>&1) || (brctl addif "$IFNAME" "$LOCAL_IFNAME")
      ;;
    openvswitch)
      if ! ovs-vsctl list-ports "$IFNAME" | grep -q "^${LOCAL_IFNAME}$"; then
        ovs-vsctl add-port "$IFNAME" "$LOCAL_IFNAME" ${VLAN:+tag="$VLAN"} \
            -- set Interface "$LOCAL_IFNAME" \
                external-ids:pipework.interface="$LOCAL_IFNAME" \
                external-ids:pipework.bridge="$IFNAME" \
                ${DOCKERCID:+external-ids:pipework.containerid="$DOCKERCID"} \
                ${DOCKERCNAME:+external-ids:pipework.containername="$DOCKERCNAME"} \
                ${NSPID:+external-ids:pipework.nspid="$NSPID"} \
                ${VLAN:+external-ids:pipework.vlan="$VLAN"}
      fi
      ;;
  esac
  ip link set "$LOCAL_IFNAME" up
}

# If it's a physical interface, create a macvlan subinterface
[ "$IFTYPE" = phys ] && {
  [ "$VLAN" ] && {
    [ ! -d "/sys/class/net/${IFNAME}.${VLAN}" ] && {
      ip link add link "$IFNAME" name "$IFNAME.$VLAN" mtu "$MTU" type vlan id "$VLAN"
    }
    ip link set "$IFNAME" up
    IFNAME=$IFNAME.$VLAN
  }

  if [ ! -z "$DIRECT_PHYS" ]; then
    GUEST_IFNAME=$IFNAME
  else
    GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME
    ip link add link "$IFNAME" dev "$GUEST_IFNAME" mtu "$MTU" type macvlan mode bridge
  fi

  ip link set "$IFNAME" up
}

# If it's an IPoIB interface, create a virtual IPoIB interface (the IPoIB
# equivalent of a macvlan device)
#
# Note: no macvlan subinterface nor Ethernet bridge can be created on top of an
# IPoIB interface. InfiniBand is not Ethernet. IPoIB is an IP layer on top of
# InfiniBand, without an intermediate Ethernet layer.
[ "$IFTYPE" = ipoib ] && {
  GUEST_IFNAME="${IFNAME}.${NSPID}"

  # If a partition key is provided, use it
  [ "$PKEY" ] && {
    GUEST_IFNAME="${IFNAME}.${PKEY}.${NSPID}"
    PKEY="pkey 0x$PKEY"
  }

  ip link add link "$IFNAME" name "$GUEST_IFNAME" type ipoib $PKEY
  ip link set "$IFNAME" up
}

# If its a dummy interface, create a dummy interface.
[ "$IFTYPE" = dummy ] && {
  GUEST_IFNAME=du$NSPID$CONTAINER_IFNAME
  ip link add dev "$GUEST_IFNAME" type dummy
}

# If the `route` command was specified ...
if [ "$IFTYPE" = route ]; then
  # ... discard the first two arguments and pass the rest to the route command.
  shift 2
  ip netns exec "$NSPID" ip route "$@"
elif [ "$IFTYPE" = rule ] ; then
  shift 2
  ip netns exec "$NSPID" ip rule "$@"
elif [ "$IFTYPE" = tc ] ; then
  shift 2
  ip netns exec "$NSPID" tc "$@"
else
  # Otherwise, run normally.
  ip link set "$GUEST_IFNAME" netns "$NSPID"
  ip netns exec "$NSPID" ip link set "$GUEST_IFNAME" name "$CONTAINER_IFNAME"
  [ "$MACADDR" ] && ip netns exec "$NSPID" ip link set dev "$CONTAINER_IFNAME" address "$MACADDR"

	# When using any of the DHCP methods, we start a DHCP client in the
	# network namespace of the container. With the 'dhcp' method, the
	# client used is taken from the Docker busybox image (therefore
	# requiring no specific client installed on the host). Other methods
	# use a locally installed client.
	case "$DHCP_CLIENT" in
	  dhcp)
	    docker run --rm -d --net container:$GUESTNAME --cap-add NET_ADMIN \
	           busybox udhcpc -i "$CONTAINER_IFNAME" -x "hostname:$GUESTNAME" \
	           $DHCP_OPTIONS \
	           >/dev/null
	    ;;
	  udhcpc)
	    DHCP_Q="-q"
	    [ "$DHCP_FOREGROUND" ] && {
	      DHCP_OPTIONS="$DHCP_OPTIONS -f"
	    }
	    ip netns exec "$NSPID" "$DHCP_CLIENT" -qi "$CONTAINER_IFNAME" \
	                                          -x "hostname:$GUESTNAME" \
	                                          -p "/var/run/udhcpc.$GUESTNAME.pid" \
	                                          $DHCP_OPTIONS
	    [ ! "$DHCP_FOREGROUND" ] && {
	      rm "/var/run/udhcpc.$GUESTNAME.pid"
	    }
	    ;;
	  dhclient)
	    ip netns exec "$NSPID" "$DHCP_CLIENT" "$CONTAINER_IFNAME" \
	                                          -pf "/var/run/dhclient.$GUESTNAME.pid" \
	                                          -lf "/etc/dhclient/dhclient.$GUESTNAME.leases" \
	                                          $DHCP_OPTIONS
	    # kill dhclient after get ip address to prevent device be used after container close
	    [ ! "$DHCP_FOREGROUND" ] && {
	      kill "$(cat "/var/run/dhclient.$GUESTNAME.pid")"
	      rm "/var/run/dhclient.$GUESTNAME.pid"
	    }
	    ;;
	  dhcpcd)
	    ip netns exec "$NSPID" "$DHCP_CLIENT" -q "$CONTAINER_IFNAME" -h "$GUESTNAME"
	    ;;
	  "")
	    if installed ipcalc; then
	      eval $(ipcalc -b $IPADDR)
	      ip netns exec "$NSPID" ip "$FAMILY_FLAG" addr add "$IPADDR" brd "$BROADCAST" dev "$CONTAINER_IFNAME"
	    else
	      ip netns exec "$NSPID" ip "$FAMILY_FLAG" addr add "$IPADDR" dev "$CONTAINER_IFNAME"
	    fi

	    [ "$GATEWAY" ] && {
	      ip netns exec "$NSPID" ip "$FAMILY_FLAG" route delete default >/dev/null 2>&1 && true
	    }
	    ip netns exec "$NSPID" ip "$FAMILY_FLAG" link set "$CONTAINER_IFNAME" up
	    [ "$GATEWAY" ] && {
	      ip netns exec "$NSPID" ip "$FAMILY_FLAG" route get "$GATEWAY" >/dev/null 2>&1 || \
	      ip netns exec "$NSPID" ip "$FAMILY_FLAG" route add "$GATEWAY/32" dev "$CONTAINER_IFNAME"
	      ip netns exec "$NSPID" ip "$FAMILY_FLAG" route replace default via "$GATEWAY" dev "$CONTAINER_IFNAME"
	    }
	    ;;
	esac

  # Give our ARP neighbors a nudge about the new interface
  if installed arping; then
    IPADDR=$(echo "$IPADDR" | cut -d/ -f1)
    ip netns exec "$NSPID" arping -c 1 -A -I "$CONTAINER_IFNAME" "$IPADDR" > /dev/null 2>&1 || true
  else
    echo "Warning: arping not found; interface may not be immediately reachable"
  fi
fi
# Remove NSPID to avoid `ip netns` catch it.
rm -f "/var/run/netns/$NSPID"

# vim: set tabstop=2 shiftwidth=2 softtabstop=2 expandtab :