xfs_health.c (9641B)
1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * Copyright (C) 2019 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_shared.h" 9#include "xfs_format.h" 10#include "xfs_log_format.h" 11#include "xfs_trans_resv.h" 12#include "xfs_mount.h" 13#include "xfs_inode.h" 14#include "xfs_trace.h" 15#include "xfs_health.h" 16#include "xfs_ag.h" 17 18/* 19 * Warn about metadata corruption that we detected but haven't fixed, and 20 * make sure we're not sitting on anything that would get in the way of 21 * recovery. 22 */ 23void 24xfs_health_unmount( 25 struct xfs_mount *mp) 26{ 27 struct xfs_perag *pag; 28 xfs_agnumber_t agno; 29 unsigned int sick = 0; 30 unsigned int checked = 0; 31 bool warn = false; 32 33 if (xfs_is_shutdown(mp)) 34 return; 35 36 /* Measure AG corruption levels. */ 37 for_each_perag(mp, agno, pag) { 38 xfs_ag_measure_sickness(pag, &sick, &checked); 39 if (sick) { 40 trace_xfs_ag_unfixed_corruption(mp, agno, sick); 41 warn = true; 42 } 43 } 44 45 /* Measure realtime volume corruption levels. */ 46 xfs_rt_measure_sickness(mp, &sick, &checked); 47 if (sick) { 48 trace_xfs_rt_unfixed_corruption(mp, sick); 49 warn = true; 50 } 51 52 /* 53 * Measure fs corruption and keep the sample around for the warning. 54 * See the note below for why we exempt FS_COUNTERS. 55 */ 56 xfs_fs_measure_sickness(mp, &sick, &checked); 57 if (sick & ~XFS_SICK_FS_COUNTERS) { 58 trace_xfs_fs_unfixed_corruption(mp, sick); 59 warn = true; 60 } 61 62 if (warn) { 63 xfs_warn(mp, 64"Uncorrected metadata errors detected; please run xfs_repair."); 65 66 /* 67 * We discovered uncorrected metadata problems at some point 68 * during this filesystem mount and have advised the 69 * administrator to run repair once the unmount completes. 70 * 71 * However, we must be careful -- when FSCOUNTERS are flagged 72 * unhealthy, the unmount procedure omits writing the clean 73 * unmount record to the log so that the next mount will run 74 * recovery and recompute the summary counters. In other 75 * words, we leave a dirty log to get the counters fixed. 76 * 77 * Unfortunately, xfs_repair cannot recover dirty logs, so if 78 * there were filesystem problems, FSCOUNTERS was flagged, and 79 * the administrator takes our advice to run xfs_repair, 80 * they'll have to zap the log before repairing structures. 81 * We don't really want to encourage this, so we mark the 82 * FSCOUNTERS healthy so that a subsequent repair run won't see 83 * a dirty log. 84 */ 85 if (sick & XFS_SICK_FS_COUNTERS) 86 xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); 87 } 88} 89 90/* Mark unhealthy per-fs metadata. */ 91void 92xfs_fs_mark_sick( 93 struct xfs_mount *mp, 94 unsigned int mask) 95{ 96 ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY)); 97 trace_xfs_fs_mark_sick(mp, mask); 98 99 spin_lock(&mp->m_sb_lock); 100 mp->m_fs_sick |= mask; 101 mp->m_fs_checked |= mask; 102 spin_unlock(&mp->m_sb_lock); 103} 104 105/* Mark a per-fs metadata healed. */ 106void 107xfs_fs_mark_healthy( 108 struct xfs_mount *mp, 109 unsigned int mask) 110{ 111 ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY)); 112 trace_xfs_fs_mark_healthy(mp, mask); 113 114 spin_lock(&mp->m_sb_lock); 115 mp->m_fs_sick &= ~mask; 116 mp->m_fs_checked |= mask; 117 spin_unlock(&mp->m_sb_lock); 118} 119 120/* Sample which per-fs metadata are unhealthy. */ 121void 122xfs_fs_measure_sickness( 123 struct xfs_mount *mp, 124 unsigned int *sick, 125 unsigned int *checked) 126{ 127 spin_lock(&mp->m_sb_lock); 128 *sick = mp->m_fs_sick; 129 *checked = mp->m_fs_checked; 130 spin_unlock(&mp->m_sb_lock); 131} 132 133/* Mark unhealthy realtime metadata. */ 134void 135xfs_rt_mark_sick( 136 struct xfs_mount *mp, 137 unsigned int mask) 138{ 139 ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY)); 140 trace_xfs_rt_mark_sick(mp, mask); 141 142 spin_lock(&mp->m_sb_lock); 143 mp->m_rt_sick |= mask; 144 mp->m_rt_checked |= mask; 145 spin_unlock(&mp->m_sb_lock); 146} 147 148/* Mark a realtime metadata healed. */ 149void 150xfs_rt_mark_healthy( 151 struct xfs_mount *mp, 152 unsigned int mask) 153{ 154 ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY)); 155 trace_xfs_rt_mark_healthy(mp, mask); 156 157 spin_lock(&mp->m_sb_lock); 158 mp->m_rt_sick &= ~mask; 159 mp->m_rt_checked |= mask; 160 spin_unlock(&mp->m_sb_lock); 161} 162 163/* Sample which realtime metadata are unhealthy. */ 164void 165xfs_rt_measure_sickness( 166 struct xfs_mount *mp, 167 unsigned int *sick, 168 unsigned int *checked) 169{ 170 spin_lock(&mp->m_sb_lock); 171 *sick = mp->m_rt_sick; 172 *checked = mp->m_rt_checked; 173 spin_unlock(&mp->m_sb_lock); 174} 175 176/* Mark unhealthy per-ag metadata. */ 177void 178xfs_ag_mark_sick( 179 struct xfs_perag *pag, 180 unsigned int mask) 181{ 182 ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY)); 183 trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask); 184 185 spin_lock(&pag->pag_state_lock); 186 pag->pag_sick |= mask; 187 pag->pag_checked |= mask; 188 spin_unlock(&pag->pag_state_lock); 189} 190 191/* Mark per-ag metadata ok. */ 192void 193xfs_ag_mark_healthy( 194 struct xfs_perag *pag, 195 unsigned int mask) 196{ 197 ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY)); 198 trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask); 199 200 spin_lock(&pag->pag_state_lock); 201 pag->pag_sick &= ~mask; 202 pag->pag_checked |= mask; 203 spin_unlock(&pag->pag_state_lock); 204} 205 206/* Sample which per-ag metadata are unhealthy. */ 207void 208xfs_ag_measure_sickness( 209 struct xfs_perag *pag, 210 unsigned int *sick, 211 unsigned int *checked) 212{ 213 spin_lock(&pag->pag_state_lock); 214 *sick = pag->pag_sick; 215 *checked = pag->pag_checked; 216 spin_unlock(&pag->pag_state_lock); 217} 218 219/* Mark the unhealthy parts of an inode. */ 220void 221xfs_inode_mark_sick( 222 struct xfs_inode *ip, 223 unsigned int mask) 224{ 225 ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY)); 226 trace_xfs_inode_mark_sick(ip, mask); 227 228 spin_lock(&ip->i_flags_lock); 229 ip->i_sick |= mask; 230 ip->i_checked |= mask; 231 spin_unlock(&ip->i_flags_lock); 232 233 /* 234 * Keep this inode around so we don't lose the sickness report. Scrub 235 * grabs inodes with DONTCACHE assuming that most inode are ok, which 236 * is not the case here. 237 */ 238 spin_lock(&VFS_I(ip)->i_lock); 239 VFS_I(ip)->i_state &= ~I_DONTCACHE; 240 spin_unlock(&VFS_I(ip)->i_lock); 241} 242 243/* Mark parts of an inode healed. */ 244void 245xfs_inode_mark_healthy( 246 struct xfs_inode *ip, 247 unsigned int mask) 248{ 249 ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY)); 250 trace_xfs_inode_mark_healthy(ip, mask); 251 252 spin_lock(&ip->i_flags_lock); 253 ip->i_sick &= ~mask; 254 ip->i_checked |= mask; 255 spin_unlock(&ip->i_flags_lock); 256} 257 258/* Sample which parts of an inode are unhealthy. */ 259void 260xfs_inode_measure_sickness( 261 struct xfs_inode *ip, 262 unsigned int *sick, 263 unsigned int *checked) 264{ 265 spin_lock(&ip->i_flags_lock); 266 *sick = ip->i_sick; 267 *checked = ip->i_checked; 268 spin_unlock(&ip->i_flags_lock); 269} 270 271/* Mappings between internal sick masks and ioctl sick masks. */ 272 273struct ioctl_sick_map { 274 unsigned int sick_mask; 275 unsigned int ioctl_mask; 276}; 277 278static const struct ioctl_sick_map fs_map[] = { 279 { XFS_SICK_FS_COUNTERS, XFS_FSOP_GEOM_SICK_COUNTERS}, 280 { XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA }, 281 { XFS_SICK_FS_GQUOTA, XFS_FSOP_GEOM_SICK_GQUOTA }, 282 { XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA }, 283 { 0, 0 }, 284}; 285 286static const struct ioctl_sick_map rt_map[] = { 287 { XFS_SICK_RT_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP }, 288 { XFS_SICK_RT_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY }, 289 { 0, 0 }, 290}; 291 292static inline void 293xfgeo_health_tick( 294 struct xfs_fsop_geom *geo, 295 unsigned int sick, 296 unsigned int checked, 297 const struct ioctl_sick_map *m) 298{ 299 if (checked & m->sick_mask) 300 geo->checked |= m->ioctl_mask; 301 if (sick & m->sick_mask) 302 geo->sick |= m->ioctl_mask; 303} 304 305/* Fill out fs geometry health info. */ 306void 307xfs_fsop_geom_health( 308 struct xfs_mount *mp, 309 struct xfs_fsop_geom *geo) 310{ 311 const struct ioctl_sick_map *m; 312 unsigned int sick; 313 unsigned int checked; 314 315 geo->sick = 0; 316 geo->checked = 0; 317 318 xfs_fs_measure_sickness(mp, &sick, &checked); 319 for (m = fs_map; m->sick_mask; m++) 320 xfgeo_health_tick(geo, sick, checked, m); 321 322 xfs_rt_measure_sickness(mp, &sick, &checked); 323 for (m = rt_map; m->sick_mask; m++) 324 xfgeo_health_tick(geo, sick, checked, m); 325} 326 327static const struct ioctl_sick_map ag_map[] = { 328 { XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB }, 329 { XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF }, 330 { XFS_SICK_AG_AGFL, XFS_AG_GEOM_SICK_AGFL }, 331 { XFS_SICK_AG_AGI, XFS_AG_GEOM_SICK_AGI }, 332 { XFS_SICK_AG_BNOBT, XFS_AG_GEOM_SICK_BNOBT }, 333 { XFS_SICK_AG_CNTBT, XFS_AG_GEOM_SICK_CNTBT }, 334 { XFS_SICK_AG_INOBT, XFS_AG_GEOM_SICK_INOBT }, 335 { XFS_SICK_AG_FINOBT, XFS_AG_GEOM_SICK_FINOBT }, 336 { XFS_SICK_AG_RMAPBT, XFS_AG_GEOM_SICK_RMAPBT }, 337 { XFS_SICK_AG_REFCNTBT, XFS_AG_GEOM_SICK_REFCNTBT }, 338 { 0, 0 }, 339}; 340 341/* Fill out ag geometry health info. */ 342void 343xfs_ag_geom_health( 344 struct xfs_perag *pag, 345 struct xfs_ag_geometry *ageo) 346{ 347 const struct ioctl_sick_map *m; 348 unsigned int sick; 349 unsigned int checked; 350 351 ageo->ag_sick = 0; 352 ageo->ag_checked = 0; 353 354 xfs_ag_measure_sickness(pag, &sick, &checked); 355 for (m = ag_map; m->sick_mask; m++) { 356 if (checked & m->sick_mask) 357 ageo->ag_checked |= m->ioctl_mask; 358 if (sick & m->sick_mask) 359 ageo->ag_sick |= m->ioctl_mask; 360 } 361} 362 363static const struct ioctl_sick_map ino_map[] = { 364 { XFS_SICK_INO_CORE, XFS_BS_SICK_INODE }, 365 { XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD }, 366 { XFS_SICK_INO_BMBTA, XFS_BS_SICK_BMBTA }, 367 { XFS_SICK_INO_BMBTC, XFS_BS_SICK_BMBTC }, 368 { XFS_SICK_INO_DIR, XFS_BS_SICK_DIR }, 369 { XFS_SICK_INO_XATTR, XFS_BS_SICK_XATTR }, 370 { XFS_SICK_INO_SYMLINK, XFS_BS_SICK_SYMLINK }, 371 { XFS_SICK_INO_PARENT, XFS_BS_SICK_PARENT }, 372 { 0, 0 }, 373}; 374 375/* Fill out bulkstat health info. */ 376void 377xfs_bulkstat_health( 378 struct xfs_inode *ip, 379 struct xfs_bulkstat *bs) 380{ 381 const struct ioctl_sick_map *m; 382 unsigned int sick; 383 unsigned int checked; 384 385 bs->bs_sick = 0; 386 bs->bs_checked = 0; 387 388 xfs_inode_measure_sickness(ip, &sick, &checked); 389 for (m = ino_map; m->sick_mask; m++) { 390 if (checked & m->sick_mask) 391 bs->bs_checked |= m->ioctl_mask; 392 if (sick & m->sick_mask) 393 bs->bs_sick |= m->ioctl_mask; 394 } 395}