mp.c (9530B)
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2/* Copyright (c) 2019 Mellanox Technologies. */ 3 4#include <linux/netdevice.h> 5#include <net/nexthop.h> 6#include "lag/lag.h" 7#include "lag/mp.h" 8#include "mlx5_core.h" 9#include "eswitch.h" 10#include "lib/mlx5.h" 11 12static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) 13{ 14 return ldev->mode == MLX5_LAG_MODE_MULTIPATH; 15} 16 17static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) 18{ 19 if (!mlx5_lag_is_ready(ldev)) 20 return false; 21 22 if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) 23 return false; 24 25 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, 26 ldev->pf[MLX5_LAG_P2].dev); 27} 28 29bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) 30{ 31 struct mlx5_lag *ldev; 32 bool res; 33 34 ldev = mlx5_lag_dev(dev); 35 res = ldev && __mlx5_lag_is_multipath(ldev); 36 37 return res; 38} 39 40/** 41 * mlx5_lag_set_port_affinity 42 * 43 * @ldev: lag device 44 * @port: 45 * 0 - set normal affinity. 46 * 1 - set affinity to port 1. 47 * 2 - set affinity to port 2. 48 * 49 **/ 50static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, 51 enum mlx5_lag_port_affinity port) 52{ 53 struct lag_tracker tracker = {}; 54 55 if (!__mlx5_lag_is_multipath(ldev)) 56 return; 57 58 switch (port) { 59 case MLX5_LAG_NORMAL_AFFINITY: 60 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 61 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 62 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 63 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 64 break; 65 case MLX5_LAG_P1_AFFINITY: 66 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; 67 tracker.netdev_state[MLX5_LAG_P1].link_up = true; 68 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; 69 tracker.netdev_state[MLX5_LAG_P2].link_up = false; 70 break; 71 case MLX5_LAG_P2_AFFINITY: 72 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; 73 tracker.netdev_state[MLX5_LAG_P1].link_up = false; 74 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; 75 tracker.netdev_state[MLX5_LAG_P2].link_up = true; 76 break; 77 default: 78 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 79 "Invalid affinity port %d", port); 80 return; 81 } 82 83 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) 84 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, 85 MLX5_DEV_EVENT_PORT_AFFINITY, 86 (void *)0); 87 88 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) 89 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, 90 MLX5_DEV_EVENT_PORT_AFFINITY, 91 (void *)0); 92 93 mlx5_modify_lag(ldev, &tracker); 94} 95 96static void mlx5_lag_fib_event_flush(struct notifier_block *nb) 97{ 98 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 99 100 flush_workqueue(mp->wq); 101} 102 103static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) 104{ 105 mp->fib.mfi = fi; 106 mp->fib.priority = fi->fib_priority; 107 mp->fib.dst = dst; 108 mp->fib.dst_len = dst_len; 109} 110 111struct mlx5_fib_event_work { 112 struct work_struct work; 113 struct mlx5_lag *ldev; 114 unsigned long event; 115 union { 116 struct fib_entry_notifier_info fen_info; 117 struct fib_nh_notifier_info fnh_info; 118 }; 119}; 120 121static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, 122 struct fib_entry_notifier_info *fen_info) 123{ 124 struct fib_info *fi = fen_info->fi; 125 struct lag_mp *mp = &ldev->lag_mp; 126 struct fib_nh *fib_nh0, *fib_nh1; 127 unsigned int nhs; 128 129 /* Handle delete event */ 130 if (event == FIB_EVENT_ENTRY_DEL) { 131 /* stop track */ 132 if (mp->fib.mfi == fi) 133 mp->fib.mfi = NULL; 134 return; 135 } 136 137 /* Handle multipath entry with lower priority value */ 138 if (mp->fib.mfi && mp->fib.mfi != fi && 139 (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && 140 fi->fib_priority >= mp->fib.priority) 141 return; 142 143 /* Handle add/replace event */ 144 nhs = fib_info_num_path(fi); 145 if (nhs == 1) { 146 if (__mlx5_lag_is_active(ldev)) { 147 struct fib_nh *nh = fib_info_nh(fi, 0); 148 struct net_device *nh_dev = nh->fib_nh_dev; 149 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); 150 151 if (i < 0) 152 return; 153 154 i++; 155 mlx5_lag_set_port_affinity(ldev, i); 156 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); 157 } 158 159 return; 160 } 161 162 if (nhs != 2) 163 return; 164 165 /* Verify next hops are ports of the same hca */ 166 fib_nh0 = fib_info_nh(fi, 0); 167 fib_nh1 = fib_info_nh(fi, 1); 168 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && 169 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && 170 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && 171 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { 172 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, 173 "Multipath offload require two ports of the same HCA\n"); 174 return; 175 } 176 177 /* First time we see multipath route */ 178 if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { 179 struct lag_tracker tracker; 180 181 tracker = ldev->tracker; 182 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false); 183 } 184 185 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 186 mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); 187} 188 189static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, 190 unsigned long event, 191 struct fib_nh *fib_nh, 192 struct fib_info *fi) 193{ 194 struct lag_mp *mp = &ldev->lag_mp; 195 196 /* Check the nh event is related to the route */ 197 if (!mp->fib.mfi || mp->fib.mfi != fi) 198 return; 199 200 /* nh added/removed */ 201 if (event == FIB_EVENT_NH_DEL) { 202 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); 203 204 if (i >= 0) { 205 i = (i + 1) % 2 + 1; /* peer port */ 206 mlx5_lag_set_port_affinity(ldev, i); 207 } 208 } else if (event == FIB_EVENT_NH_ADD && 209 fib_info_num_path(fi) == 2) { 210 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); 211 } 212} 213 214static void mlx5_lag_fib_update(struct work_struct *work) 215{ 216 struct mlx5_fib_event_work *fib_work = 217 container_of(work, struct mlx5_fib_event_work, work); 218 struct mlx5_lag *ldev = fib_work->ldev; 219 struct fib_nh *fib_nh; 220 221 /* Protect internal structures from changes */ 222 rtnl_lock(); 223 switch (fib_work->event) { 224 case FIB_EVENT_ENTRY_REPLACE: 225 case FIB_EVENT_ENTRY_DEL: 226 mlx5_lag_fib_route_event(ldev, fib_work->event, 227 &fib_work->fen_info); 228 fib_info_put(fib_work->fen_info.fi); 229 break; 230 case FIB_EVENT_NH_ADD: 231 case FIB_EVENT_NH_DEL: 232 fib_nh = fib_work->fnh_info.fib_nh; 233 mlx5_lag_fib_nexthop_event(ldev, 234 fib_work->event, 235 fib_work->fnh_info.fib_nh, 236 fib_nh->nh_parent); 237 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); 238 break; 239 } 240 241 rtnl_unlock(); 242 kfree(fib_work); 243} 244 245static struct mlx5_fib_event_work * 246mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) 247{ 248 struct mlx5_fib_event_work *fib_work; 249 250 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); 251 if (WARN_ON(!fib_work)) 252 return NULL; 253 254 INIT_WORK(&fib_work->work, mlx5_lag_fib_update); 255 fib_work->ldev = ldev; 256 fib_work->event = event; 257 258 return fib_work; 259} 260 261static int mlx5_lag_fib_event(struct notifier_block *nb, 262 unsigned long event, 263 void *ptr) 264{ 265 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); 266 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); 267 struct fib_notifier_info *info = ptr; 268 struct mlx5_fib_event_work *fib_work; 269 struct fib_entry_notifier_info *fen_info; 270 struct fib_nh_notifier_info *fnh_info; 271 struct net_device *fib_dev; 272 struct fib_info *fi; 273 274 if (info->family != AF_INET) 275 return NOTIFY_DONE; 276 277 if (!mlx5_lag_multipath_check_prereq(ldev)) 278 return NOTIFY_DONE; 279 280 switch (event) { 281 case FIB_EVENT_ENTRY_REPLACE: 282 case FIB_EVENT_ENTRY_DEL: 283 fen_info = container_of(info, struct fib_entry_notifier_info, 284 info); 285 fi = fen_info->fi; 286 if (fi->nh) 287 return NOTIFY_DONE; 288 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; 289 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && 290 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { 291 return NOTIFY_DONE; 292 } 293 fib_work = mlx5_lag_init_fib_work(ldev, event); 294 if (!fib_work) 295 return NOTIFY_DONE; 296 fib_work->fen_info = *fen_info; 297 /* Take reference on fib_info to prevent it from being 298 * freed while work is queued. Release it afterwards. 299 */ 300 fib_info_hold(fib_work->fen_info.fi); 301 break; 302 case FIB_EVENT_NH_ADD: 303 case FIB_EVENT_NH_DEL: 304 fnh_info = container_of(info, struct fib_nh_notifier_info, 305 info); 306 fib_work = mlx5_lag_init_fib_work(ldev, event); 307 if (!fib_work) 308 return NOTIFY_DONE; 309 fib_work->fnh_info = *fnh_info; 310 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); 311 break; 312 default: 313 return NOTIFY_DONE; 314 } 315 316 queue_work(mp->wq, &fib_work->work); 317 318 return NOTIFY_DONE; 319} 320 321void mlx5_lag_mp_reset(struct mlx5_lag *ldev) 322{ 323 /* Clear mfi, as it might become stale when a route delete event 324 * has been missed, see mlx5_lag_fib_route_event(). 325 */ 326 ldev->lag_mp.fib.mfi = NULL; 327} 328 329int mlx5_lag_mp_init(struct mlx5_lag *ldev) 330{ 331 struct lag_mp *mp = &ldev->lag_mp; 332 int err; 333 334 /* always clear mfi, as it might become stale when a route delete event 335 * has been missed 336 */ 337 mp->fib.mfi = NULL; 338 339 if (mp->fib_nb.notifier_call) 340 return 0; 341 342 mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); 343 if (!mp->wq) 344 return -ENOMEM; 345 346 mp->fib_nb.notifier_call = mlx5_lag_fib_event; 347 err = register_fib_notifier(&init_net, &mp->fib_nb, 348 mlx5_lag_fib_event_flush, NULL); 349 if (err) { 350 destroy_workqueue(mp->wq); 351 mp->fib_nb.notifier_call = NULL; 352 } 353 354 return err; 355} 356 357void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) 358{ 359 struct lag_mp *mp = &ldev->lag_mp; 360 361 if (!mp->fib_nb.notifier_call) 362 return; 363 364 unregister_fib_notifier(&init_net, &mp->fib_nb); 365 destroy_workqueue(mp->wq); 366 mp->fib_nb.notifier_call = NULL; 367 mp->fib.mfi = NULL; 368}