Browse Source

device: add support for configuring bonding devices

Supported options:

- ports: member devices
- policy: bonding policy
	supported values:
	- balance-rr
	- active-backup
	- balance-xor
	- broadcast
	- 802.3ad
	- balance-tlb
	- balance-alb
- xmit_hash_policy: slave selection tx hash policy
	supported values:
	- layer2
	- layer2+3
	- layer3+4
	- encap2+3
	- encap3+4
- all_ports_active: allow receiving on inactive ports
- min_links: minimum number of active links
- ad_actor_system: LACPDU exchange MAC address
- ad_actor_sys_prio: LACPDU priority value
- ad_select: 802.3ad aggregation logic
	supported values:
	- stable
	- bandwidth
	- count
- lacp_rate: 802.3ad LACPDU packet rate
	supported values:
	- slow (every 30 seconds)
	- fast (every second)
- packets_per_port: number of packets before switching
  ports (balance-rr mode).
- lp_interval: number of seconds between sent learning packets
- dynamic_lb: distribute traffic according to port load
- resend_igmp: number if IGMP membership updates after failover event
- num_peer_notif: number of tx unsolicited ARP/NA after failover event
- primary: name of the primary port
- primary_reselect: primary port reselection policy
	supported values:
	- always
	- better
	- failure
- failover_mac: MAC address selection policy
	supported values:
	- none
	- active
	- follow
- monitor_mode: select ARP or MII link monitor:
	supported values:
	- arp
	- mii
- monitor_interval: link monitor update interval (msec)
- arp_target: ARP monitor target IP address (list)
- arp_all_targets: all targets must be reachable to consider the link valid
- arp_validate: ARP validation policy
	supported values:
	- none
	- active
	- backup
	- all
	- filter
	- filter_active
	- filter_backup
- use_carrier: use carrier status instead of MII ioctl result
- updelay: delay before enabling port after MII link up event (msec)
- downdelay: delay before disabling port after MII link down event (msec)

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Felix Fietkau 2 years ago
parent
commit
5ba9744aac
7 changed files with 914 additions and 1 deletions
  1. 1 1
      CMakeLists.txt
  2. 746 0
      bonding.c
  3. 7 0
      config/network
  4. 10 0
      system-dummy.c
  5. 97 0
      system-linux.c
  6. 10 0
      system.c
  7. 43 0
      system.h

+ 1 - 1
CMakeLists.txt

@@ -21,7 +21,7 @@ SET(SOURCES
 	iprule.c proto.c proto-static.c proto-shell.c
 	config.c device.c bridge.c veth.c vlan.c alias.c
 	macvlan.c ubus.c vlandev.c wireless.c
-	extdev.c)
+	extdev.c bonding.c)
 
 
 SET(LIBS

+ 746 - 0
bonding.c

@@ -0,0 +1,746 @@
+/*
+ * netifd - network interface daemon
+ * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "netifd.h"
+#include "device.h"
+#include "system.h"
+
+struct bonding_device {
+	struct device dev;
+	device_state_cb set_state;
+
+	struct blob_attr *port_list;
+	struct vlist_tree ports;
+	int n_present;
+	int n_failed;
+
+	struct bonding_port *primary_port;
+	struct uloop_timeout retry;
+
+	struct bonding_config config;
+	struct blob_attr *config_data;
+	bool has_macaddr;
+	bool force_active;
+	bool active;
+};
+
+struct bonding_port {
+	struct vlist_node node;
+	struct bonding_device *bdev;
+	struct device_user dev;
+	bool set_primary;
+	bool present;
+	bool active;
+	char name[];
+};
+
+enum {
+	BOND_ATTR_PORTS,
+
+	BOND_ATTR_POLICY,
+	BOND_ATTR_XMIT_HASH_POLICY,
+	BOND_ATTR_ALL_PORTS_ACTIVE,
+
+	BOND_ATTR_MIN_LINKS,
+	BOND_ATTR_AD_ACTOR_SYSTEM,
+	BOND_ATTR_AD_ACTOR_SYS_PRIO,
+	BOND_ATTR_AD_SELECT,
+	BOND_ATTR_LACP_RATE,
+
+	BOND_ATTR_PACKETS_PER_PORT,
+	BOND_ATTR_LP_INTERVAL,
+	BOND_ATTR_DYNAMIC_LB,
+	BOND_ATTR_RESEND_IGMP,
+
+	BOND_ATTR_NUM_PEER_NOTIF,
+	BOND_ATTR_PRIMARY,
+	BOND_ATTR_PRIMARY_RESELECT,
+	BOND_ATTR_FAILOVER_MAC,
+
+	BOND_ATTR_MON_MODE,
+	BOND_ATTR_MON_INTERVAL,
+	BOND_ATTR_ARP_TARGET,
+	BOND_ATTR_ARP_ALL_TARGETS,
+	BOND_ATTR_ARP_VALIDATE,
+	BOND_ATTR_USE_CARRIER,
+	BOND_ATTR_UPDELAY,
+	BOND_ATTR_DOWNDELAY,
+
+	__BOND_ATTR_MAX,
+};
+
+static const struct blobmsg_policy bonding_attrs[__BOND_ATTR_MAX] = {
+	[BOND_ATTR_PORTS] = { "ports", BLOBMSG_TYPE_ARRAY },
+	[BOND_ATTR_POLICY] = { "policy", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_XMIT_HASH_POLICY] = { "xmit_hash_policy", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_ALL_PORTS_ACTIVE] = { "all_ports_active", BLOBMSG_TYPE_BOOL },
+	[BOND_ATTR_MIN_LINKS] = { "min_links", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_AD_ACTOR_SYSTEM] = { "ad_actor_system", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_AD_ACTOR_SYS_PRIO] = { "ad_actor_sys_prio", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_AD_SELECT] = { "ad_select", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_LACP_RATE] = { "lacp_rate", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_PACKETS_PER_PORT] = { "packets_per_port", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_LP_INTERVAL] = { "lp_interval", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_DYNAMIC_LB] = { "dynamic_lb", BLOBMSG_TYPE_BOOL },
+	[BOND_ATTR_RESEND_IGMP] = { "resend_igmp", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_NUM_PEER_NOTIF] = { "num_peer_notif", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_PRIMARY] = { "primary", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_PRIMARY_RESELECT] = { "primary_reselect", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_FAILOVER_MAC] = { "failover_mac", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_MON_MODE] = { "monitor_mode", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_MON_INTERVAL] = { "monitor_interval", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_ARP_TARGET] = { "arp_target", BLOBMSG_TYPE_ARRAY },
+	[BOND_ATTR_ARP_ALL_TARGETS] = { "arp_all_targets", BLOBMSG_TYPE_BOOL },
+	[BOND_ATTR_ARP_VALIDATE] = { "arp_validate", BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_USE_CARRIER] = { "use_carrier", BLOBMSG_TYPE_BOOL },
+	[BOND_ATTR_UPDELAY] = { "updelay", BLOBMSG_TYPE_INT32 },
+	[BOND_ATTR_DOWNDELAY] = { "downdelay", BLOBMSG_TYPE_INT32 },
+};
+
+static const struct uci_blob_param_info bonding_attr_info[__BOND_ATTR_MAX] = {
+	[BOND_ATTR_PORTS] = { .type = BLOBMSG_TYPE_STRING },
+	[BOND_ATTR_ARP_TARGET] = { .type = BLOBMSG_TYPE_STRING },
+};
+
+static const struct uci_blob_param_list bonding_attr_list = {
+	.n_params = __BOND_ATTR_MAX,
+	.params = bonding_attrs,
+	.info = bonding_attr_info,
+
+	.n_next = 1,
+	.next = { &device_attr_list },
+};
+
+static void
+bonding_reset_primary(struct bonding_device *bdev)
+{
+	struct bonding_port *bp;
+
+	bdev->primary_port = NULL;
+	if (!bdev->has_macaddr)
+		bdev->dev.settings.flags &= ~DEV_OPT_MACADDR;
+
+	vlist_for_each_element(&bdev->ports, bp, node) {
+		uint8_t *macaddr;
+
+		if (!bp->present)
+			continue;
+
+		if (bdev->primary_port && !bp->set_primary)
+			continue;
+
+		bdev->primary_port = bp;
+		if (bdev->has_macaddr)
+			continue;
+
+		if (bp->dev.dev->settings.flags & DEV_OPT_MACADDR)
+			macaddr = bp->dev.dev->settings.macaddr;
+		else
+			macaddr = bp->dev.dev->orig_settings.macaddr;
+		memcpy(bdev->dev.settings.macaddr, macaddr, 6);
+		bdev->dev.settings.flags |= DEV_OPT_MACADDR;
+	}
+}
+
+static int
+bonding_disable_port(struct bonding_port *bp, bool keep_dev)
+{
+	struct bonding_device *bdev = bp->bdev;
+
+	if (!bp->present || !bp->active)
+		return 0;
+
+	bp->active = false;
+
+	system_bonding_set_port(&bdev->dev, bp->dev.dev, false, bp->set_primary);
+	if (!keep_dev)
+		device_release(&bp->dev);
+
+	return 0;
+}
+
+static void
+bonding_remove_port(struct bonding_port *bp)
+{
+	struct bonding_device *bdev = bp->bdev;
+
+	if (!bp->present)
+		return;
+
+	if (bdev->dev.active)
+		bonding_disable_port(bp, false);
+
+	bp->present = false;
+	bp->bdev->n_present--;
+
+	if (bp == bdev->primary_port)
+		bonding_reset_primary(bdev);
+
+	bdev->force_active = false;
+	if (bdev->n_present == 0)
+		device_set_present(&bdev->dev, false);
+}
+
+static int
+bonding_set_active(struct bonding_device *bdev, bool active)
+{
+	int ret;
+
+	if (bdev->active == active)
+		return 0;
+
+	ret = system_bonding_set_device(&bdev->dev, active ? &bdev->config : NULL);
+	if (ret < 0)
+		return ret;
+
+	bdev->active = active;
+	return 0;
+}
+
+static int
+bonding_enable_port(struct bonding_port *bp)
+{
+	struct bonding_device *bdev = bp->bdev;
+	struct device *dev;
+	int ret;
+
+	if (!bp->present)
+		return 0;
+
+	ret = bonding_set_active(bdev, true);
+	if (ret)
+		goto error;
+
+	/* Disable IPv6 for bonding ports */
+	if (!(bp->dev.dev->settings.flags & DEV_OPT_IPV6)) {
+		bp->dev.dev->settings.ipv6 = 0;
+		bp->dev.dev->settings.flags |= DEV_OPT_IPV6;
+	}
+
+	ret = device_claim(&bp->dev);
+	if (ret < 0)
+		goto error;
+
+	dev = bp->dev.dev;
+	if (dev->settings.auth && !dev->auth_status)
+		return -1;
+
+	if (bp->active)
+		return 0;
+
+	ret = system_bonding_set_port(&bdev->dev, bp->dev.dev, true, bp->set_primary);
+	if (ret < 0) {
+		D(DEVICE, "Bonding port %s could not be added\n", bp->dev.dev->ifname);
+		goto error;
+	}
+
+	bp->active = true;
+	device_set_present(&bdev->dev, true);
+
+	return 0;
+
+error:
+	bdev->n_failed++;
+	bp->present = false;
+	bdev->n_present--;
+	device_release(&bp->dev);
+
+	return ret;
+}
+
+static void
+bonding_port_cb(struct device_user *dep, enum device_event ev)
+{
+	struct bonding_port *bp = container_of(dep, struct bonding_port, dev);
+	struct bonding_device *bdev = bp->bdev;
+	struct device *dev = dep->dev;
+
+	switch (ev) {
+	case DEV_EVENT_ADD:
+		if (bp->present)
+			break;
+
+		bp->present = true;
+		bdev->n_present++;
+
+		if (bdev->n_present == 1)
+			device_set_present(&bdev->dev, true);
+		fallthrough;
+	case DEV_EVENT_AUTH_UP:
+		if (!bdev->dev.active)
+			break;
+
+		if (bonding_enable_port(bp))
+			break;
+
+		/*
+		 * Adding a bonding port can overwrite the bonding device mtu
+		 * in the kernel, apply the bonding settings in case the
+		 * bonding device mtu is set
+		 */
+		system_if_apply_settings(&bdev->dev, &bdev->dev.settings,
+					 DEV_OPT_MTU | DEV_OPT_MTU6);
+		break;
+	case DEV_EVENT_LINK_DOWN:
+		if (!dev->settings.auth)
+			break;
+
+		bonding_disable_port(bp, true);
+		break;
+	case DEV_EVENT_REMOVE:
+		if (dep->hotplug) {
+			vlist_delete(&bdev->ports, &bp->node);
+			return;
+		}
+
+		if (bp->present)
+			bonding_remove_port(bp);
+
+		break;
+	default:
+		return;
+	}
+}
+
+static struct bonding_port *
+bonding_create_port(struct bonding_device *bdev, const char *name,
+		    struct device *dev, bool hotplug)
+{
+	struct bonding_port *bp;
+
+	bp = calloc(1, sizeof(*bp) + strlen(name) + 1);
+	if (!bp)
+		return NULL;
+
+	bp->bdev = bdev;
+	bp->dev.cb = bonding_port_cb;
+	bp->dev.hotplug = hotplug;
+	strcpy(bp->name, name);
+	bp->dev.dev = dev;
+	vlist_add(&bdev->ports, &bp->node, bp->name);
+	/*
+	 * Need to look up the bonding port again as the above
+	 * created pointer will be freed in case the bonding port
+	 * already existed
+	 */
+	if (!hotplug)
+		return bp;
+
+	bp = vlist_find(&bdev->ports, name, bp, node);
+	if (bp)
+		bp->node.version = -1;
+
+	return bp;
+}
+
+static void
+bonding_config_init(struct device *dev)
+{
+	struct bonding_device *bdev;
+	struct blob_attr *cur;
+	int rem;
+
+	bdev = container_of(dev, struct bonding_device, dev);
+
+	bdev->n_failed = 0;
+
+	vlist_update(&bdev->ports);
+	blobmsg_for_each_attr(cur, bdev->port_list, rem) {
+		const char *name = blobmsg_get_string(cur);
+
+		dev = device_get(name, true);
+		if (!dev)
+			continue;
+
+		bonding_create_port(bdev, name, dev, false);
+	}
+	vlist_flush(&bdev->ports);
+
+	if (bdev->n_failed)
+		uloop_timeout_set(&bdev->retry, 100);
+}
+
+static void
+bonding_apply_settings(struct bonding_device *bdev, struct blob_attr **tb)
+{
+	struct bonding_config *cfg = &bdev->config;
+	struct blob_attr *cur;
+
+	/* defaults */
+	memset(cfg, 0, sizeof(*cfg));
+	cfg->resend_igmp = 1;
+	cfg->ad_actor_sys_prio = 65535;
+	cfg->lp_interval = 1;
+	cfg->num_peer_notif = 1;
+
+#define cfg_item(_type, _field, _attr)				\
+	do {							\
+		if ((cur = tb[BOND_ATTR_##_attr]) != NULL)	\
+			cfg->_field = blobmsg_get_##_type(cur);	\
+	} while (0)
+
+	if ((cur = tb[BOND_ATTR_POLICY]) != NULL) {
+		const char *policy = blobmsg_get_string(cur);
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(bonding_policy_str); i++) {
+			if (strcmp(policy, bonding_policy_str[i]) != 0)
+				continue;
+
+			cfg->policy = i;
+			break;
+		}
+	}
+
+	cfg_item(string, xmit_hash_policy, XMIT_HASH_POLICY);
+	cfg_item(bool, all_ports_active, ALL_PORTS_ACTIVE);
+	cfg_item(u32, min_links, MIN_LINKS);
+	cfg_item(string, ad_actor_system, AD_ACTOR_SYSTEM);
+	cfg_item(u32, ad_actor_sys_prio, AD_ACTOR_SYS_PRIO);
+	cfg_item(string, ad_select, AD_SELECT);
+	cfg_item(string, lacp_rate, LACP_RATE);
+	cfg_item(u32, packets_per_port, PACKETS_PER_PORT);
+	cfg_item(u32, lp_interval, LP_INTERVAL);
+	cfg_item(bool, dynamic_lb, DYNAMIC_LB);
+	cfg_item(u32, resend_igmp, RESEND_IGMP);
+	cfg_item(u32, num_peer_notif, NUM_PEER_NOTIF);
+	cfg_item(string, primary, PRIMARY);
+	cfg_item(string, primary_reselect, PRIMARY_RESELECT);
+	cfg_item(string, failover_mac, FAILOVER_MAC);
+	cfg_item(u32, monitor_interval, MON_INTERVAL);
+	cfg_item(bool, arp_all_targets, ARP_ALL_TARGETS);
+	cfg_item(string, arp_validate, ARP_VALIDATE);
+	cfg_item(bool, use_carrier, USE_CARRIER);
+	cfg_item(u32, updelay, UPDELAY);
+	cfg_item(u32, downdelay, DOWNDELAY);
+
+	if ((cur = tb[BOND_ATTR_MON_MODE]) != NULL &&
+	    !strcmp(blobmsg_get_string(cur), "arp"))
+		cfg->monitor_arp = true;
+	cfg->arp_target = tb[BOND_ATTR_ARP_TARGET];
+#undef cfg_item
+}
+
+static enum dev_change_type
+bonding_reload(struct device *dev, struct blob_attr *attr)
+{
+	struct blob_attr *tb_dev[__DEV_ATTR_MAX];
+	struct blob_attr *tb_b[__BOND_ATTR_MAX];
+	enum dev_change_type ret = DEV_CONFIG_APPLIED;
+	unsigned long diff;
+	struct bonding_device *bdev;
+
+	BUILD_BUG_ON(sizeof(diff) < __BOND_ATTR_MAX / 8);
+	BUILD_BUG_ON(sizeof(diff) < __DEV_ATTR_MAX / 8);
+
+	bdev = container_of(dev, struct bonding_device, dev);
+	attr = blob_memdup(attr);
+
+	blobmsg_parse(device_attr_list.params, __DEV_ATTR_MAX, tb_dev,
+		blob_data(attr), blob_len(attr));
+	blobmsg_parse(bonding_attrs, __BOND_ATTR_MAX, tb_b,
+		blob_data(attr), blob_len(attr));
+
+	bdev->has_macaddr = tb_dev[DEV_ATTR_MACADDR];
+	if (bdev->primary_port && !bdev->primary_port->set_primary &&
+	    tb_dev[DEV_ATTR_MACADDR])
+		bdev->primary_port = NULL;
+
+	bdev->port_list = tb_b[BOND_ATTR_PORTS];
+	device_init_settings(dev, tb_dev);
+	bonding_apply_settings(bdev, tb_b);
+
+	if (bdev->config_data) {
+		struct blob_attr *otb_dev[__DEV_ATTR_MAX];
+		struct blob_attr *otb_b[__BOND_ATTR_MAX];
+
+		blobmsg_parse(device_attr_list.params, __DEV_ATTR_MAX, otb_dev,
+			blob_data(bdev->config_data), blob_len(bdev->config_data));
+
+		diff = 0;
+		uci_blob_diff(tb_dev, otb_dev, &device_attr_list, &diff);
+		if (diff)
+		    ret = DEV_CONFIG_RESTART;
+
+		blobmsg_parse(bonding_attrs, __BOND_ATTR_MAX, otb_b,
+			blob_data(bdev->config_data), blob_len(bdev->config_data));
+
+		diff = 0;
+		uci_blob_diff(tb_b, otb_b, &bonding_attr_list, &diff);
+		if (diff & ~(1 << BOND_ATTR_PORTS))
+		    ret = DEV_CONFIG_RESTART;
+
+		bonding_config_init(dev);
+	}
+
+	free(bdev->config_data);
+	bdev->config_data = attr;
+
+	return ret;
+}
+
+static int
+bonding_hotplug_add(struct device *dev, struct device *port, struct blob_attr *vlan)
+{
+	struct bonding_device *bdev = container_of(dev, struct bonding_device, dev);
+	struct bonding_port *bp;
+
+	bp = vlist_find(&bdev->ports, port->ifname, bp, node);
+	if (!bp)
+		bonding_create_port(bdev, port->ifname, port, true);
+
+	return 0;
+}
+
+static int
+bonding_hotplug_del(struct device *dev, struct device *port, struct blob_attr *vlan)
+{
+	struct bonding_device *bdev = container_of(dev, struct bonding_device, dev);
+	struct bonding_port *bp;
+
+	bp = vlist_find(&bdev->ports, port->ifname, bp, node);
+	if (!bp)
+		return UBUS_STATUS_NOT_FOUND;
+
+	if (bp->dev.hotplug)
+		vlist_delete(&bdev->ports, &bp->node);
+
+	return 0;
+}
+
+static int
+bonding_hotplug_prepare(struct device *dev, struct device **bonding_dev)
+{
+	struct bonding_device *bdev;
+
+	if (bonding_dev)
+		*bonding_dev = dev;
+
+	bdev = container_of(dev, struct bonding_device, dev);
+	bdev->force_active = true;
+	device_set_present(&bdev->dev, true);
+
+	return 0;
+}
+
+static void
+bonding_retry_ports(struct uloop_timeout *timeout)
+{
+	struct bonding_device *bdev = container_of(timeout, struct bonding_device, retry);
+	struct bonding_port *bp;
+
+	bdev->n_failed = 0;
+	vlist_for_each_element(&bdev->ports, bp, node) {
+		if (bp->present)
+			continue;
+
+		if (!bp->dev.dev->present)
+			continue;
+
+		bp->present = true;
+		bdev->n_present++;
+		bonding_enable_port(bp);
+	}
+}
+
+
+static void
+bonding_free_port(struct bonding_port *bp)
+{
+	struct device *dev = bp->dev.dev;
+
+	bonding_remove_port(bp);
+
+	device_lock();
+
+	device_remove_user(&bp->dev);
+
+	/*
+	 * When reloading the config and moving a device from one master to
+	 * another, the other master may have tried to claim this device
+	 * before it was removed here.
+	 * Ensure that claiming the device is retried by toggling its present
+	 * state
+	 */
+	if (dev->present) {
+		device_set_present(dev, false);
+		device_set_present(dev, true);
+	}
+
+	device_unlock();
+
+	free(bp);
+}
+
+static void
+bonding_port_update(struct vlist_tree *tree, struct vlist_node *node_new,
+		     struct vlist_node *node_old)
+{
+	struct bonding_port *bp;
+	struct device *dev;
+
+	if (node_new) {
+		bp = container_of(node_new, struct bonding_port, node);
+
+		if (node_old) {
+			free(bp);
+			return;
+		}
+
+		dev = bp->dev.dev;
+		bp->dev.dev = NULL;
+		device_add_user(&bp->dev, dev);
+	}
+
+
+	if (node_old) {
+		bp = container_of(node_old, struct bonding_port, node);
+		bonding_free_port(bp);
+	}
+}
+
+static int
+bonding_set_down(struct bonding_device *bdev)
+{
+	struct bonding_port *bp;
+
+	bdev->set_state(&bdev->dev, false);
+
+	vlist_for_each_element(&bdev->ports, bp, node)
+		bonding_disable_port(bp, false);
+
+	bonding_set_active(bdev, false);
+
+	return 0;
+}
+
+static int
+bonding_set_up(struct bonding_device *bdev)
+{
+	struct bonding_port *bp;
+	int ret;
+
+	if (!bdev->n_present) {
+		if (!bdev->force_active)
+			return -ENOENT;
+
+		ret = bonding_set_active(bdev, true);
+		if (ret)
+			return ret;
+	}
+
+	bdev->n_failed = 0;
+	vlist_for_each_element(&bdev->ports, bp, node)
+		bonding_enable_port(bp);
+	if (bdev->n_failed)
+		uloop_timeout_set(&bdev->retry, 100);
+
+	if (!bdev->force_active && !bdev->n_present) {
+		/* initialization of all port interfaces failed */
+		bonding_set_active(bdev, false);
+		device_set_present(&bdev->dev, false);
+		return -ENOENT;
+	}
+
+	bonding_reset_primary(bdev);
+	ret = bdev->set_state(&bdev->dev, true);
+	if (ret < 0)
+		bonding_set_down(bdev);
+
+	return ret;
+}
+
+static int
+bonding_set_state(struct device *dev, bool up)
+{
+	struct bonding_device *bdev;
+
+	bdev = container_of(dev, struct bonding_device, dev);
+
+	if (up)
+		return bonding_set_up(bdev);
+	else
+		return bonding_set_down(bdev);
+}
+
+static struct device *
+bonding_create(const char *name, struct device_type *devtype,
+	struct blob_attr *attr)
+{
+	static const struct device_hotplug_ops bonding_ops = {
+		.prepare = bonding_hotplug_prepare,
+		.add = bonding_hotplug_add,
+		.del = bonding_hotplug_del
+	};
+	struct bonding_device *bdev;
+	struct device *dev = NULL;
+
+	bdev = calloc(1, sizeof(*bdev));
+	if (!bdev)
+		return NULL;
+
+	dev = &bdev->dev;
+
+	if (device_init(dev, devtype, name) < 0) {
+		device_cleanup(dev);
+		free(bdev);
+		return NULL;
+	}
+
+	dev->config_pending = true;
+	bdev->retry.cb = bonding_retry_ports;
+
+	bdev->set_state = dev->set_state;
+	dev->set_state = bonding_set_state;
+
+	dev->hotplug_ops = &bonding_ops;
+
+	vlist_init(&bdev->ports, avl_strcmp, bonding_port_update);
+	bdev->ports.keep_old = true;
+
+	bonding_reload(dev, attr);
+
+	return dev;
+}
+
+static void
+bonding_free(struct device *dev)
+{
+	struct bonding_device *bdev;
+
+	bdev = container_of(dev, struct bonding_device, dev);
+	vlist_flush_all(&bdev->ports);
+	free(bdev->config_data);
+	free(bdev);
+}
+
+static struct device_type bonding_device_type = {
+	.name = "bonding",
+	.config_params = &bonding_attr_list,
+
+	.bridge_capability = true,
+
+	.create = bonding_create,
+	.config_init = bonding_config_init,
+	.reload = bonding_reload,
+	.free = bonding_free,
+};
+
+static void __init bonding_device_type_init(void)
+{
+	device_type_add(&bonding_device_type);
+}

+ 7 - 0
config/network

@@ -11,11 +11,18 @@ config device
 	option type	bridge
 	list ifname	eth0.1
 	list ifname	eth0.2
+	list ifname	bond0
 	option mtu	1500
 
 config device
 	option name		dummy
 
+config device
+	option name	bond0
+	option type bonding
+	list ports eth0.123
+	list ports eth0.124
+
 config interface lan
 	option ifname	'br-lan'
 	option proto	static

+ 10 - 0
system-dummy.c

@@ -75,6 +75,16 @@ int system_bridge_vlan_check(struct device *dev, char *ifname)
 	return 0;
 }
 
+int system_bonding_set_device(struct device *dev, struct bonding_config *cfg)
+{
+	return 0;
+}
+
+int system_bonding_set_port(struct device *dev, struct device *port, bool add, bool primary)
+{
+	return 0;
+}
+
 int system_link_netns_move(struct device *dev, int netns_fd, const char *target_ifname)
 {
 	D(SYSTEM, "ip link set %s name %s netns %d\n", dev->ifname, target_ifname, netns_fd);

+ 97 - 0
system-linux.c

@@ -350,9 +350,21 @@ system_get_dev_sysctl(const char *prefix, const char *file, const char *ifname,
 static void
 system_set_dev_sysfs(const char *file, const char *ifname, const char *val)
 {
+	if (!val)
+		return;
+
 	write_file(dev_sysfs_path(ifname, file), val);
 }
 
+static void
+system_set_dev_sysfs_int(const char *file, const char *ifname, int val)
+{
+	char buf[16];
+
+	snprintf(buf, sizeof(buf), "%d", val);
+	system_set_dev_sysfs(file, ifname, buf);
+}
+
 static int
 system_get_dev_sysfs(const char *file, const char *ifname, char *buf, size_t buf_sz)
 {
@@ -991,6 +1003,91 @@ failure:
 	return ret;
 }
 
+int system_bonding_set_device(struct device *dev, struct bonding_config *cfg)
+{
+	const char *ifname = dev->ifname;
+	struct blob_attr *cur;
+	char op = cfg ? '+' : '-';
+	char buf[64];
+	int rem;
+
+	snprintf(dev_buf, sizeof(dev_buf), "%s/class/net/bonding_masters", sysfs_path);
+	snprintf(buf, sizeof(buf), "%c%s", op, ifname);
+	write_file(dev_buf, buf);
+
+	if (!cfg)
+		return 0;
+
+	system_set_dev_sysfs("bonding/mode", ifname, bonding_policy_str[cfg->policy]);
+
+	system_set_dev_sysfs_int("bonding/all_ports_active", ifname, cfg->all_ports_active);
+
+	if (cfg->policy == BONDING_MODE_BALANCE_XOR ||
+	    cfg->policy == BONDING_MODE_BALANCE_TLB ||
+	    cfg->policy == BONDING_MODE_8023AD)
+		system_set_dev_sysfs("bonding/xmit_hash_policy", ifname, cfg->xmit_hash_policy);
+
+	if (cfg->policy == BONDING_MODE_8023AD) {
+		system_set_dev_sysfs("bonding/ad_actor_system", ifname, cfg->ad_actor_system);
+		system_set_dev_sysfs_int("bonding/ad_actor_sys_prio", ifname, cfg->ad_actor_sys_prio);
+		system_set_dev_sysfs("bonding/ad_select", ifname, cfg->ad_select);
+		system_set_dev_sysfs("bonding/lacp_rate", ifname, cfg->lacp_rate);
+		system_set_dev_sysfs_int("bonding/min_links", ifname, cfg->min_links);
+	}
+
+	if (cfg->policy == BONDING_MODE_BALANCE_RR)
+		system_set_dev_sysfs_int("bonding/packets_per_slave", ifname, cfg->packets_per_port);
+
+	if (cfg->policy == BONDING_MODE_BALANCE_TLB ||
+	    cfg->policy == BONDING_MODE_BALANCE_ALB)
+		system_set_dev_sysfs_int("bonding/lp_interval", ifname, cfg->lp_interval);
+
+	if (cfg->policy == BONDING_MODE_BALANCE_TLB)
+		system_set_dev_sysfs_int("bonding/tlb_dynamic_lb", ifname, cfg->dynamic_lb);
+	system_set_dev_sysfs_int("bonding/resend_igmp", ifname, cfg->resend_igmp);
+	system_set_dev_sysfs_int("bonding/num_grat_arp", ifname, cfg->num_peer_notif);
+	system_set_dev_sysfs("bonding/primary_reselect", ifname, cfg->primary_reselect);
+	system_set_dev_sysfs("bonding/fail_over_mac", ifname, cfg->failover_mac);
+
+	system_set_dev_sysfs_int((cfg->monitor_arp ?
+				  "bonding/arp_interval" :
+				  "bonding/miimon"), ifname, cfg->monitor_interval);
+
+	blobmsg_for_each_attr(cur, cfg->arp_target, rem) {
+		snprintf(buf, sizeof(buf), "+%s", blobmsg_get_string(cur));
+		system_set_dev_sysfs("bonding/arp_ip_target", ifname, buf);
+	}
+
+	system_set_dev_sysfs_int("bonding/arp_all_targets", ifname, cfg->arp_all_targets);
+	if (cfg->policy < BONDING_MODE_8023AD)
+		system_set_dev_sysfs("bonding/arp_validate", ifname, cfg->arp_validate);
+	system_set_dev_sysfs_int("bonding/use_carrier", ifname, cfg->use_carrier);
+	if (!cfg->monitor_arp && cfg->monitor_interval) {
+		system_set_dev_sysfs_int("bonding/updelay", ifname, cfg->updelay);
+		system_set_dev_sysfs_int("bonding/downdelay", ifname, cfg->downdelay);
+	}
+
+	return 0;
+}
+
+int system_bonding_set_port(struct device *dev, struct device *port, bool add, bool primary)
+{
+	const char *port_name = port->ifname;
+	const char op_ch = add ? '+' : '-';
+	char buf[IFNAMSIZ + 2];
+
+	snprintf(buf, sizeof(buf), "%c%s", op_ch, port_name);
+	system_if_down(port);
+	system_set_dev_sysfs("bonding/slaves", dev->ifname, buf);
+	system_if_up(port);
+
+	if (primary)
+		system_set_dev_sysfs("bonding/primary", dev->ifname,
+				     add ? port_name : "");
+
+	return 0;
+}
+
 int system_if_resolve(struct device *dev)
 {
 	struct ifreq ifr;

+ 10 - 0
system.c

@@ -121,6 +121,16 @@ const struct uci_blob_param_list fmr_data_attr_list = {
 	.params = fmr_data_attrs,
 };
 
+const char * const bonding_policy_str[__BONDING_MODE_MAX] = {
+	[BONDING_MODE_BALANCE_RR] = "balance-rr",
+	[BONDING_MODE_ACTIVE_BACKUP] = "active-backup",
+	[BONDING_MODE_BALANCE_XOR] = "balance-xor",
+	[BONDING_MODE_BROADCAST] = "broadcast",
+	[BONDING_MODE_8023AD] = "802.3ad",
+	[BONDING_MODE_BALANCE_TLB] = "balance-tlb",
+	[BONDING_MODE_BALANCE_ALB] = "balance-alb",
+};
+
 void system_fd_set_cloexec(int fd)
 {
 #ifdef FD_CLOEXEC

+ 43 - 0
system.h

@@ -184,6 +184,44 @@ struct vlandev_config {
 	struct vlist_simple_tree egress_qos_mapping_list;  /* list of struct vlan_qos_mapping */
 };
 
+enum bonding_mode {
+	BONDING_MODE_BALANCE_RR,
+	BONDING_MODE_ACTIVE_BACKUP,
+	BONDING_MODE_BALANCE_XOR,
+	BONDING_MODE_BROADCAST,
+	BONDING_MODE_8023AD,
+	BONDING_MODE_BALANCE_TLB,
+	BONDING_MODE_BALANCE_ALB,
+	__BONDING_MODE_MAX,
+};
+
+struct bonding_config {
+	enum bonding_mode policy;
+	const char *xmit_hash_policy;
+	bool all_ports_active;
+	int min_links;
+	const char *ad_actor_system;
+	int ad_actor_sys_prio;
+	const char *ad_select;
+	const char *lacp_rate;
+	int packets_per_port;
+	int lp_interval;
+	bool dynamic_lb;
+	int resend_igmp;
+	int num_peer_notif;
+	const char *primary;
+	const char *primary_reselect;
+	const char *failover_mac;
+	bool monitor_arp;
+	int monitor_interval;
+	struct blob_attr *arp_target;
+	bool arp_all_targets;
+	const char *arp_validate;
+	bool use_carrier;
+	int updelay;
+	int downdelay;
+};
+
 static inline int system_get_addr_family(unsigned int flags)
 {
 	if ((flags & DEVADDR_FAMILY) == DEVADDR_INET6)
@@ -200,6 +238,8 @@ static inline int system_get_addr_len(unsigned int flags)
 		return sizeof(struct in6_addr);
 }
 
+extern const char * const bonding_policy_str[__BONDING_MODE_MAX];
+
 int system_init(void);
 
 int system_bridge_addbr(struct device *bridge, struct bridge_config *cfg);
@@ -210,6 +250,9 @@ int system_bridge_vlan(const char *iface, uint16_t vid, bool add, unsigned int v
 int system_bridge_vlan_check(struct device *dev, char *ifname);
 void system_bridge_set_stp_state(struct device *dev, bool val);
 
+int system_bonding_set_device(struct device *dev, struct bonding_config *cfg);
+int system_bonding_set_port(struct device *dev, struct device *port, bool add, bool primary);
+
 int system_macvlan_add(struct device *macvlan, struct device *dev, struct macvlan_config *cfg);
 int system_macvlan_del(struct device *macvlan);