io.c (37319B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * This file is part of UBIFS. 4 * 5 * Copyright (C) 2006-2008 Nokia Corporation. 6 * Copyright (C) 2006, 2007 University of Szeged, Hungary 7 * 8 * Authors: Artem Bityutskiy (Битюцкий Артём) 9 * Adrian Hunter 10 * Zoltan Sogor 11 */ 12 13/* 14 * This file implements UBIFS I/O subsystem which provides various I/O-related 15 * helper functions (reading/writing/checking/validating nodes) and implements 16 * write-buffering support. Write buffers help to save space which otherwise 17 * would have been wasted for padding to the nearest minimal I/O unit boundary. 18 * Instead, data first goes to the write-buffer and is flushed when the 19 * buffer is full or when it is not used for some time (by timer). This is 20 * similar to the mechanism is used by JFFS2. 21 * 22 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum 23 * write size (@c->max_write_size). The latter is the maximum amount of bytes 24 * the underlying flash is able to program at a time, and writing in 25 * @c->max_write_size units should presumably be faster. Obviously, 26 * @c->min_io_size <= @c->max_write_size. Write-buffers are of 27 * @c->max_write_size bytes in size for maximum performance. However, when a 28 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size 29 * boundary) which contains data is written, not the whole write-buffer, 30 * because this is more space-efficient. 31 * 32 * This optimization adds few complications to the code. Indeed, on the one 33 * hand, we want to write in optimal @c->max_write_size bytes chunks, which 34 * also means aligning writes at the @c->max_write_size bytes offsets. On the 35 * other hand, we do not want to waste space when synchronizing the write 36 * buffer, so during synchronization we writes in smaller chunks. And this makes 37 * the next write offset to be not aligned to @c->max_write_size bytes. So the 38 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned 39 * to @c->max_write_size bytes again. We do this by temporarily shrinking 40 * write-buffer size (@wbuf->size). 41 * 42 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 43 * mutexes defined inside these objects. Since sometimes upper-level code 44 * has to lock the write-buffer (e.g. journal space reservation code), many 45 * functions related to write-buffers have "nolock" suffix which means that the 46 * caller has to lock the write-buffer before calling this function. 47 * 48 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not 49 * aligned, UBIFS starts the next node from the aligned address, and the padded 50 * bytes may contain any rubbish. In other words, UBIFS does not put padding 51 * bytes in those small gaps. Common headers of nodes store real node lengths, 52 * not aligned lengths. Indexing nodes also store real lengths in branches. 53 * 54 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 55 * uses padding nodes or padding bytes, if the padding node does not fit. 56 * 57 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when 58 * they are read from the flash media. 59 */ 60 61#include <linux/crc32.h> 62#include <linux/slab.h> 63#include "ubifs.h" 64 65/** 66 * ubifs_ro_mode - switch UBIFS to read read-only mode. 67 * @c: UBIFS file-system description object 68 * @err: error code which is the reason of switching to R/O mode 69 */ 70void ubifs_ro_mode(struct ubifs_info *c, int err) 71{ 72 if (!c->ro_error) { 73 c->ro_error = 1; 74 c->no_chk_data_crc = 0; 75 c->vfs_sb->s_flags |= SB_RDONLY; 76 ubifs_warn(c, "switched to read-only mode, error %d", err); 77 dump_stack(); 78 } 79} 80 81/* 82 * Below are simple wrappers over UBI I/O functions which include some 83 * additional checks and UBIFS debugging stuff. See corresponding UBI function 84 * for more information. 85 */ 86 87int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, 88 int len, int even_ebadmsg) 89{ 90 int err; 91 92 err = ubi_read(c->ubi, lnum, buf, offs, len); 93 /* 94 * In case of %-EBADMSG print the error message only if the 95 * @even_ebadmsg is true. 96 */ 97 if (err && (err != -EBADMSG || even_ebadmsg)) { 98 ubifs_err(c, "reading %d bytes from LEB %d:%d failed, error %d", 99 len, lnum, offs, err); 100 dump_stack(); 101 } 102 return err; 103} 104 105int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, 106 int len) 107{ 108 int err; 109 110 ubifs_assert(c, !c->ro_media && !c->ro_mount); 111 if (c->ro_error) 112 return -EROFS; 113 if (!dbg_is_tst_rcvry(c)) 114 err = ubi_leb_write(c->ubi, lnum, buf, offs, len); 115 else 116 err = dbg_leb_write(c, lnum, buf, offs, len); 117 if (err) { 118 ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d", 119 len, lnum, offs, err); 120 ubifs_ro_mode(c, err); 121 dump_stack(); 122 } 123 return err; 124} 125 126int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len) 127{ 128 int err; 129 130 ubifs_assert(c, !c->ro_media && !c->ro_mount); 131 if (c->ro_error) 132 return -EROFS; 133 if (!dbg_is_tst_rcvry(c)) 134 err = ubi_leb_change(c->ubi, lnum, buf, len); 135 else 136 err = dbg_leb_change(c, lnum, buf, len); 137 if (err) { 138 ubifs_err(c, "changing %d bytes in LEB %d failed, error %d", 139 len, lnum, err); 140 ubifs_ro_mode(c, err); 141 dump_stack(); 142 } 143 return err; 144} 145 146int ubifs_leb_unmap(struct ubifs_info *c, int lnum) 147{ 148 int err; 149 150 ubifs_assert(c, !c->ro_media && !c->ro_mount); 151 if (c->ro_error) 152 return -EROFS; 153 if (!dbg_is_tst_rcvry(c)) 154 err = ubi_leb_unmap(c->ubi, lnum); 155 else 156 err = dbg_leb_unmap(c, lnum); 157 if (err) { 158 ubifs_err(c, "unmap LEB %d failed, error %d", lnum, err); 159 ubifs_ro_mode(c, err); 160 dump_stack(); 161 } 162 return err; 163} 164 165int ubifs_leb_map(struct ubifs_info *c, int lnum) 166{ 167 int err; 168 169 ubifs_assert(c, !c->ro_media && !c->ro_mount); 170 if (c->ro_error) 171 return -EROFS; 172 if (!dbg_is_tst_rcvry(c)) 173 err = ubi_leb_map(c->ubi, lnum); 174 else 175 err = dbg_leb_map(c, lnum); 176 if (err) { 177 ubifs_err(c, "mapping LEB %d failed, error %d", lnum, err); 178 ubifs_ro_mode(c, err); 179 dump_stack(); 180 } 181 return err; 182} 183 184int ubifs_is_mapped(const struct ubifs_info *c, int lnum) 185{ 186 int err; 187 188 err = ubi_is_mapped(c->ubi, lnum); 189 if (err < 0) { 190 ubifs_err(c, "ubi_is_mapped failed for LEB %d, error %d", 191 lnum, err); 192 dump_stack(); 193 } 194 return err; 195} 196 197static void record_magic_error(struct ubifs_stats_info *stats) 198{ 199 if (stats) 200 stats->magic_errors++; 201} 202 203static void record_node_error(struct ubifs_stats_info *stats) 204{ 205 if (stats) 206 stats->node_errors++; 207} 208 209static void record_crc_error(struct ubifs_stats_info *stats) 210{ 211 if (stats) 212 stats->crc_errors++; 213} 214 215/** 216 * ubifs_check_node - check node. 217 * @c: UBIFS file-system description object 218 * @buf: node to check 219 * @len: node length 220 * @lnum: logical eraseblock number 221 * @offs: offset within the logical eraseblock 222 * @quiet: print no messages 223 * @must_chk_crc: indicates whether to always check the CRC 224 * 225 * This function checks node magic number and CRC checksum. This function also 226 * validates node length to prevent UBIFS from becoming crazy when an attacker 227 * feeds it a file-system image with incorrect nodes. For example, too large 228 * node length in the common header could cause UBIFS to read memory outside of 229 * allocated buffer when checking the CRC checksum. 230 * 231 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 232 * true, which is controlled by corresponding UBIFS mount option. However, if 233 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 234 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are 235 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC 236 * is checked. This is because during mounting or re-mounting from R/O mode to 237 * R/W mode we may read journal nodes (when replying the journal or doing the 238 * recovery) and the journal nodes may potentially be corrupted, so checking is 239 * required. 240 * 241 * This function returns zero in case of success and %-EUCLEAN in case of bad 242 * CRC or magic. 243 */ 244int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, 245 int lnum, int offs, int quiet, int must_chk_crc) 246{ 247 int err = -EINVAL, type, node_len; 248 uint32_t crc, node_crc, magic; 249 const struct ubifs_ch *ch = buf; 250 251 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 252 ubifs_assert(c, !(offs & 7) && offs < c->leb_size); 253 254 magic = le32_to_cpu(ch->magic); 255 if (magic != UBIFS_NODE_MAGIC) { 256 if (!quiet) 257 ubifs_err(c, "bad magic %#08x, expected %#08x", 258 magic, UBIFS_NODE_MAGIC); 259 record_magic_error(c->stats); 260 err = -EUCLEAN; 261 goto out; 262 } 263 264 type = ch->node_type; 265 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { 266 if (!quiet) 267 ubifs_err(c, "bad node type %d", type); 268 record_node_error(c->stats); 269 goto out; 270 } 271 272 node_len = le32_to_cpu(ch->len); 273 if (node_len + offs > c->leb_size) 274 goto out_len; 275 276 if (c->ranges[type].max_len == 0) { 277 if (node_len != c->ranges[type].len) 278 goto out_len; 279 } else if (node_len < c->ranges[type].min_len || 280 node_len > c->ranges[type].max_len) 281 goto out_len; 282 283 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && 284 !c->remounting_rw && c->no_chk_data_crc) 285 return 0; 286 287 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 288 node_crc = le32_to_cpu(ch->crc); 289 if (crc != node_crc) { 290 if (!quiet) 291 ubifs_err(c, "bad CRC: calculated %#08x, read %#08x", 292 crc, node_crc); 293 record_crc_error(c->stats); 294 err = -EUCLEAN; 295 goto out; 296 } 297 298 return 0; 299 300out_len: 301 if (!quiet) 302 ubifs_err(c, "bad node length %d", node_len); 303out: 304 if (!quiet) { 305 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); 306 ubifs_dump_node(c, buf, len); 307 dump_stack(); 308 } 309 return err; 310} 311 312/** 313 * ubifs_pad - pad flash space. 314 * @c: UBIFS file-system description object 315 * @buf: buffer to put padding to 316 * @pad: how many bytes to pad 317 * 318 * The flash media obliges us to write only in chunks of %c->min_io_size and 319 * when we have to write less data we add padding node to the write-buffer and 320 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the 321 * media is being scanned. If the amount of wasted space is not enough to fit a 322 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes 323 * pattern (%UBIFS_PADDING_BYTE). 324 * 325 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is 326 * used. 327 */ 328void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) 329{ 330 uint32_t crc; 331 332 ubifs_assert(c, pad >= 0); 333 334 if (pad >= UBIFS_PAD_NODE_SZ) { 335 struct ubifs_ch *ch = buf; 336 struct ubifs_pad_node *pad_node = buf; 337 338 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 339 ch->node_type = UBIFS_PAD_NODE; 340 ch->group_type = UBIFS_NO_NODE_GROUP; 341 ch->padding[0] = ch->padding[1] = 0; 342 ch->sqnum = 0; 343 ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); 344 pad -= UBIFS_PAD_NODE_SZ; 345 pad_node->pad_len = cpu_to_le32(pad); 346 crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); 347 ch->crc = cpu_to_le32(crc); 348 memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); 349 } else if (pad > 0) 350 /* Too little space, padding node won't fit */ 351 memset(buf, UBIFS_PADDING_BYTE, pad); 352} 353 354/** 355 * next_sqnum - get next sequence number. 356 * @c: UBIFS file-system description object 357 */ 358static unsigned long long next_sqnum(struct ubifs_info *c) 359{ 360 unsigned long long sqnum; 361 362 spin_lock(&c->cnt_lock); 363 sqnum = ++c->max_sqnum; 364 spin_unlock(&c->cnt_lock); 365 366 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { 367 if (sqnum >= SQNUM_WATERMARK) { 368 ubifs_err(c, "sequence number overflow %llu, end of life", 369 sqnum); 370 ubifs_ro_mode(c, -EINVAL); 371 } 372 ubifs_warn(c, "running out of sequence numbers, end of life soon"); 373 } 374 375 return sqnum; 376} 377 378void ubifs_init_node(struct ubifs_info *c, void *node, int len, int pad) 379{ 380 struct ubifs_ch *ch = node; 381 unsigned long long sqnum = next_sqnum(c); 382 383 ubifs_assert(c, len >= UBIFS_CH_SZ); 384 385 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 386 ch->len = cpu_to_le32(len); 387 ch->group_type = UBIFS_NO_NODE_GROUP; 388 ch->sqnum = cpu_to_le64(sqnum); 389 ch->padding[0] = ch->padding[1] = 0; 390 391 if (pad) { 392 len = ALIGN(len, 8); 393 pad = ALIGN(len, c->min_io_size) - len; 394 ubifs_pad(c, node + len, pad); 395 } 396} 397 398void ubifs_crc_node(struct ubifs_info *c, void *node, int len) 399{ 400 struct ubifs_ch *ch = node; 401 uint32_t crc; 402 403 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 404 ch->crc = cpu_to_le32(crc); 405} 406 407/** 408 * ubifs_prepare_node_hmac - prepare node to be written to flash. 409 * @c: UBIFS file-system description object 410 * @node: the node to pad 411 * @len: node length 412 * @hmac_offs: offset of the HMAC in the node 413 * @pad: if the buffer has to be padded 414 * 415 * This function prepares node at @node to be written to the media - it 416 * calculates node CRC, fills the common header, and adds proper padding up to 417 * the next minimum I/O unit if @pad is not zero. if @hmac_offs is positive then 418 * a HMAC is inserted into the node at the given offset. 419 * 420 * This function returns 0 for success or a negative error code otherwise. 421 */ 422int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len, 423 int hmac_offs, int pad) 424{ 425 int err; 426 427 ubifs_init_node(c, node, len, pad); 428 429 if (hmac_offs > 0) { 430 err = ubifs_node_insert_hmac(c, node, len, hmac_offs); 431 if (err) 432 return err; 433 } 434 435 ubifs_crc_node(c, node, len); 436 437 return 0; 438} 439 440/** 441 * ubifs_prepare_node - prepare node to be written to flash. 442 * @c: UBIFS file-system description object 443 * @node: the node to pad 444 * @len: node length 445 * @pad: if the buffer has to be padded 446 * 447 * This function prepares node at @node to be written to the media - it 448 * calculates node CRC, fills the common header, and adds proper padding up to 449 * the next minimum I/O unit if @pad is not zero. 450 */ 451void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) 452{ 453 /* 454 * Deliberately ignore return value since this function can only fail 455 * when a hmac offset is given. 456 */ 457 ubifs_prepare_node_hmac(c, node, len, 0, pad); 458} 459 460/** 461 * ubifs_prep_grp_node - prepare node of a group to be written to flash. 462 * @c: UBIFS file-system description object 463 * @node: the node to pad 464 * @len: node length 465 * @last: indicates the last node of the group 466 * 467 * This function prepares node at @node to be written to the media - it 468 * calculates node CRC and fills the common header. 469 */ 470void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) 471{ 472 uint32_t crc; 473 struct ubifs_ch *ch = node; 474 unsigned long long sqnum = next_sqnum(c); 475 476 ubifs_assert(c, len >= UBIFS_CH_SZ); 477 478 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 479 ch->len = cpu_to_le32(len); 480 if (last) 481 ch->group_type = UBIFS_LAST_OF_NODE_GROUP; 482 else 483 ch->group_type = UBIFS_IN_NODE_GROUP; 484 ch->sqnum = cpu_to_le64(sqnum); 485 ch->padding[0] = ch->padding[1] = 0; 486 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 487 ch->crc = cpu_to_le32(crc); 488} 489 490/** 491 * wbuf_timer_callback - write-buffer timer callback function. 492 * @timer: timer data (write-buffer descriptor) 493 * 494 * This function is called when the write-buffer timer expires. 495 */ 496static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) 497{ 498 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 499 500 dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); 501 wbuf->need_sync = 1; 502 wbuf->c->need_wbuf_sync = 1; 503 ubifs_wake_up_bgt(wbuf->c); 504 return HRTIMER_NORESTART; 505} 506 507/** 508 * new_wbuf_timer - start new write-buffer timer. 509 * @c: UBIFS file-system description object 510 * @wbuf: write-buffer descriptor 511 */ 512static void new_wbuf_timer_nolock(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 513{ 514 ktime_t softlimit = ms_to_ktime(dirty_writeback_interval * 10); 515 unsigned long long delta = dirty_writeback_interval; 516 517 /* centi to milli, milli to nano, then 10% */ 518 delta *= 10ULL * NSEC_PER_MSEC / 10ULL; 519 520 ubifs_assert(c, !hrtimer_active(&wbuf->timer)); 521 ubifs_assert(c, delta <= ULONG_MAX); 522 523 if (wbuf->no_timer) 524 return; 525 dbg_io("set timer for jhead %s, %llu-%llu millisecs", 526 dbg_jhead(wbuf->jhead), 527 div_u64(ktime_to_ns(softlimit), USEC_PER_SEC), 528 div_u64(ktime_to_ns(softlimit) + delta, USEC_PER_SEC)); 529 hrtimer_start_range_ns(&wbuf->timer, softlimit, delta, 530 HRTIMER_MODE_REL); 531} 532 533/** 534 * cancel_wbuf_timer - cancel write-buffer timer. 535 * @wbuf: write-buffer descriptor 536 */ 537static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 538{ 539 if (wbuf->no_timer) 540 return; 541 wbuf->need_sync = 0; 542 hrtimer_cancel(&wbuf->timer); 543} 544 545/** 546 * ubifs_wbuf_sync_nolock - synchronize write-buffer. 547 * @wbuf: write-buffer to synchronize 548 * 549 * This function synchronizes write-buffer @buf and returns zero in case of 550 * success or a negative error code in case of failure. 551 * 552 * Note, although write-buffers are of @c->max_write_size, this function does 553 * not necessarily writes all @c->max_write_size bytes to the flash. Instead, 554 * if the write-buffer is only partially filled with data, only the used part 555 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. 556 * This way we waste less space. 557 */ 558int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 559{ 560 struct ubifs_info *c = wbuf->c; 561 int err, dirt, sync_len; 562 563 cancel_wbuf_timer_nolock(wbuf); 564 if (!wbuf->used || wbuf->lnum == -1) 565 /* Write-buffer is empty or not seeked */ 566 return 0; 567 568 dbg_io("LEB %d:%d, %d bytes, jhead %s", 569 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 570 ubifs_assert(c, !(wbuf->avail & 7)); 571 ubifs_assert(c, wbuf->offs + wbuf->size <= c->leb_size); 572 ubifs_assert(c, wbuf->size >= c->min_io_size); 573 ubifs_assert(c, wbuf->size <= c->max_write_size); 574 ubifs_assert(c, wbuf->size % c->min_io_size == 0); 575 ubifs_assert(c, !c->ro_media && !c->ro_mount); 576 if (c->leb_size - wbuf->offs >= c->max_write_size) 577 ubifs_assert(c, !((wbuf->offs + wbuf->size) % c->max_write_size)); 578 579 if (c->ro_error) 580 return -EROFS; 581 582 /* 583 * Do not write whole write buffer but write only the minimum necessary 584 * amount of min. I/O units. 585 */ 586 sync_len = ALIGN(wbuf->used, c->min_io_size); 587 dirt = sync_len - wbuf->used; 588 if (dirt) 589 ubifs_pad(c, wbuf->buf + wbuf->used, dirt); 590 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len); 591 if (err) 592 return err; 593 594 spin_lock(&wbuf->lock); 595 wbuf->offs += sync_len; 596 /* 597 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. 598 * But our goal is to optimize writes and make sure we write in 599 * @c->max_write_size chunks and to @c->max_write_size-aligned offset. 600 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make 601 * sure that @wbuf->offs + @wbuf->size is aligned to 602 * @c->max_write_size. This way we make sure that after next 603 * write-buffer flush we are again at the optimal offset (aligned to 604 * @c->max_write_size). 605 */ 606 if (c->leb_size - wbuf->offs < c->max_write_size) 607 wbuf->size = c->leb_size - wbuf->offs; 608 else if (wbuf->offs & (c->max_write_size - 1)) 609 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 610 else 611 wbuf->size = c->max_write_size; 612 wbuf->avail = wbuf->size; 613 wbuf->used = 0; 614 wbuf->next_ino = 0; 615 spin_unlock(&wbuf->lock); 616 617 if (wbuf->sync_callback) 618 err = wbuf->sync_callback(c, wbuf->lnum, 619 c->leb_size - wbuf->offs, dirt); 620 return err; 621} 622 623/** 624 * ubifs_wbuf_seek_nolock - seek write-buffer. 625 * @wbuf: write-buffer 626 * @lnum: logical eraseblock number to seek to 627 * @offs: logical eraseblock offset to seek to 628 * 629 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 630 * The write-buffer has to be empty. Returns zero in case of success and a 631 * negative error code in case of failure. 632 */ 633int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs) 634{ 635 const struct ubifs_info *c = wbuf->c; 636 637 dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); 638 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt); 639 ubifs_assert(c, offs >= 0 && offs <= c->leb_size); 640 ubifs_assert(c, offs % c->min_io_size == 0 && !(offs & 7)); 641 ubifs_assert(c, lnum != wbuf->lnum); 642 ubifs_assert(c, wbuf->used == 0); 643 644 spin_lock(&wbuf->lock); 645 wbuf->lnum = lnum; 646 wbuf->offs = offs; 647 if (c->leb_size - wbuf->offs < c->max_write_size) 648 wbuf->size = c->leb_size - wbuf->offs; 649 else if (wbuf->offs & (c->max_write_size - 1)) 650 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 651 else 652 wbuf->size = c->max_write_size; 653 wbuf->avail = wbuf->size; 654 wbuf->used = 0; 655 spin_unlock(&wbuf->lock); 656 657 return 0; 658} 659 660/** 661 * ubifs_bg_wbufs_sync - synchronize write-buffers. 662 * @c: UBIFS file-system description object 663 * 664 * This function is called by background thread to synchronize write-buffers. 665 * Returns zero in case of success and a negative error code in case of 666 * failure. 667 */ 668int ubifs_bg_wbufs_sync(struct ubifs_info *c) 669{ 670 int err, i; 671 672 ubifs_assert(c, !c->ro_media && !c->ro_mount); 673 if (!c->need_wbuf_sync) 674 return 0; 675 c->need_wbuf_sync = 0; 676 677 if (c->ro_error) { 678 err = -EROFS; 679 goto out_timers; 680 } 681 682 dbg_io("synchronize"); 683 for (i = 0; i < c->jhead_cnt; i++) { 684 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 685 686 cond_resched(); 687 688 /* 689 * If the mutex is locked then wbuf is being changed, so 690 * synchronization is not necessary. 691 */ 692 if (mutex_is_locked(&wbuf->io_mutex)) 693 continue; 694 695 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 696 if (!wbuf->need_sync) { 697 mutex_unlock(&wbuf->io_mutex); 698 continue; 699 } 700 701 err = ubifs_wbuf_sync_nolock(wbuf); 702 mutex_unlock(&wbuf->io_mutex); 703 if (err) { 704 ubifs_err(c, "cannot sync write-buffer, error %d", err); 705 ubifs_ro_mode(c, err); 706 goto out_timers; 707 } 708 } 709 710 return 0; 711 712out_timers: 713 /* Cancel all timers to prevent repeated errors */ 714 for (i = 0; i < c->jhead_cnt; i++) { 715 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 716 717 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 718 cancel_wbuf_timer_nolock(wbuf); 719 mutex_unlock(&wbuf->io_mutex); 720 } 721 return err; 722} 723 724/** 725 * ubifs_wbuf_write_nolock - write data to flash via write-buffer. 726 * @wbuf: write-buffer 727 * @buf: node to write 728 * @len: node length 729 * 730 * This function writes data to flash via write-buffer @wbuf. This means that 731 * the last piece of the node won't reach the flash media immediately if it 732 * does not take whole max. write unit (@c->max_write_size). Instead, the node 733 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or 734 * because more data are appended to the write-buffer). 735 * 736 * This function returns zero in case of success and a negative error code in 737 * case of failure. If the node cannot be written because there is no more 738 * space in this logical eraseblock, %-ENOSPC is returned. 739 */ 740int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 741{ 742 struct ubifs_info *c = wbuf->c; 743 int err, n, written = 0, aligned_len = ALIGN(len, 8); 744 745 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 746 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 747 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); 748 ubifs_assert(c, len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 749 ubifs_assert(c, wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 750 ubifs_assert(c, !(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 751 ubifs_assert(c, wbuf->avail > 0 && wbuf->avail <= wbuf->size); 752 ubifs_assert(c, wbuf->size >= c->min_io_size); 753 ubifs_assert(c, wbuf->size <= c->max_write_size); 754 ubifs_assert(c, wbuf->size % c->min_io_size == 0); 755 ubifs_assert(c, mutex_is_locked(&wbuf->io_mutex)); 756 ubifs_assert(c, !c->ro_media && !c->ro_mount); 757 ubifs_assert(c, !c->space_fixup); 758 if (c->leb_size - wbuf->offs >= c->max_write_size) 759 ubifs_assert(c, !((wbuf->offs + wbuf->size) % c->max_write_size)); 760 761 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 762 err = -ENOSPC; 763 goto out; 764 } 765 766 cancel_wbuf_timer_nolock(wbuf); 767 768 if (c->ro_error) 769 return -EROFS; 770 771 if (aligned_len <= wbuf->avail) { 772 /* 773 * The node is not very large and fits entirely within 774 * write-buffer. 775 */ 776 memcpy(wbuf->buf + wbuf->used, buf, len); 777 if (aligned_len > len) { 778 ubifs_assert(c, aligned_len - len < 8); 779 ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len); 780 } 781 782 if (aligned_len == wbuf->avail) { 783 dbg_io("flush jhead %s wbuf to LEB %d:%d", 784 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 785 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, 786 wbuf->offs, wbuf->size); 787 if (err) 788 goto out; 789 790 spin_lock(&wbuf->lock); 791 wbuf->offs += wbuf->size; 792 if (c->leb_size - wbuf->offs >= c->max_write_size) 793 wbuf->size = c->max_write_size; 794 else 795 wbuf->size = c->leb_size - wbuf->offs; 796 wbuf->avail = wbuf->size; 797 wbuf->used = 0; 798 wbuf->next_ino = 0; 799 spin_unlock(&wbuf->lock); 800 } else { 801 spin_lock(&wbuf->lock); 802 wbuf->avail -= aligned_len; 803 wbuf->used += aligned_len; 804 spin_unlock(&wbuf->lock); 805 } 806 807 goto exit; 808 } 809 810 if (wbuf->used) { 811 /* 812 * The node is large enough and does not fit entirely within 813 * current available space. We have to fill and flush 814 * write-buffer and switch to the next max. write unit. 815 */ 816 dbg_io("flush jhead %s wbuf to LEB %d:%d", 817 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 818 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 819 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, 820 wbuf->size); 821 if (err) 822 goto out; 823 824 wbuf->offs += wbuf->size; 825 len -= wbuf->avail; 826 aligned_len -= wbuf->avail; 827 written += wbuf->avail; 828 } else if (wbuf->offs & (c->max_write_size - 1)) { 829 /* 830 * The write-buffer offset is not aligned to 831 * @c->max_write_size and @wbuf->size is less than 832 * @c->max_write_size. Write @wbuf->size bytes to make sure the 833 * following writes are done in optimal @c->max_write_size 834 * chunks. 835 */ 836 dbg_io("write %d bytes to LEB %d:%d", 837 wbuf->size, wbuf->lnum, wbuf->offs); 838 err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, 839 wbuf->size); 840 if (err) 841 goto out; 842 843 wbuf->offs += wbuf->size; 844 len -= wbuf->size; 845 aligned_len -= wbuf->size; 846 written += wbuf->size; 847 } 848 849 /* 850 * The remaining data may take more whole max. write units, so write the 851 * remains multiple to max. write unit size directly to the flash media. 852 * We align node length to 8-byte boundary because we anyway flash wbuf 853 * if the remaining space is less than 8 bytes. 854 */ 855 n = aligned_len >> c->max_write_shift; 856 if (n) { 857 int m = n - 1; 858 859 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, 860 wbuf->offs); 861 862 if (m) { 863 /* '(n-1)<<c->max_write_shift < len' is always true. */ 864 m <<= c->max_write_shift; 865 err = ubifs_leb_write(c, wbuf->lnum, buf + written, 866 wbuf->offs, m); 867 if (err) 868 goto out; 869 wbuf->offs += m; 870 aligned_len -= m; 871 len -= m; 872 written += m; 873 } 874 875 /* 876 * The non-written len of buf may be less than 'n' because 877 * parameter 'len' is not 8 bytes aligned, so here we read 878 * min(len, n) bytes from buf. 879 */ 880 n = 1 << c->max_write_shift; 881 memcpy(wbuf->buf, buf + written, min(len, n)); 882 if (n > len) { 883 ubifs_assert(c, n - len < 8); 884 ubifs_pad(c, wbuf->buf + len, n - len); 885 } 886 887 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); 888 if (err) 889 goto out; 890 wbuf->offs += n; 891 aligned_len -= n; 892 len -= min(len, n); 893 written += n; 894 } 895 896 spin_lock(&wbuf->lock); 897 if (aligned_len) { 898 /* 899 * And now we have what's left and what does not take whole 900 * max. write unit, so write it to the write-buffer and we are 901 * done. 902 */ 903 memcpy(wbuf->buf, buf + written, len); 904 if (aligned_len > len) { 905 ubifs_assert(c, aligned_len - len < 8); 906 ubifs_pad(c, wbuf->buf + len, aligned_len - len); 907 } 908 } 909 910 if (c->leb_size - wbuf->offs >= c->max_write_size) 911 wbuf->size = c->max_write_size; 912 else 913 wbuf->size = c->leb_size - wbuf->offs; 914 wbuf->avail = wbuf->size - aligned_len; 915 wbuf->used = aligned_len; 916 wbuf->next_ino = 0; 917 spin_unlock(&wbuf->lock); 918 919exit: 920 if (wbuf->sync_callback) { 921 int free = c->leb_size - wbuf->offs - wbuf->used; 922 923 err = wbuf->sync_callback(c, wbuf->lnum, free, 0); 924 if (err) 925 goto out; 926 } 927 928 if (wbuf->used) 929 new_wbuf_timer_nolock(c, wbuf); 930 931 return 0; 932 933out: 934 ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d", 935 len, wbuf->lnum, wbuf->offs, err); 936 ubifs_dump_node(c, buf, written + len); 937 dump_stack(); 938 ubifs_dump_leb(c, wbuf->lnum); 939 return err; 940} 941 942/** 943 * ubifs_write_node_hmac - write node to the media. 944 * @c: UBIFS file-system description object 945 * @buf: the node to write 946 * @len: node length 947 * @lnum: logical eraseblock number 948 * @offs: offset within the logical eraseblock 949 * @hmac_offs: offset of the HMAC within the node 950 * 951 * This function automatically fills node magic number, assigns sequence 952 * number, and calculates node CRC checksum. The length of the @buf buffer has 953 * to be aligned to the minimal I/O unit size. This function automatically 954 * appends padding node and padding bytes if needed. Returns zero in case of 955 * success and a negative error code in case of failure. 956 */ 957int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, 958 int offs, int hmac_offs) 959{ 960 int err, buf_len = ALIGN(len, c->min_io_size); 961 962 dbg_io("LEB %d:%d, %s, length %d (aligned %d)", 963 lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, 964 buf_len); 965 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 966 ubifs_assert(c, offs % c->min_io_size == 0 && offs < c->leb_size); 967 ubifs_assert(c, !c->ro_media && !c->ro_mount); 968 ubifs_assert(c, !c->space_fixup); 969 970 if (c->ro_error) 971 return -EROFS; 972 973 err = ubifs_prepare_node_hmac(c, buf, len, hmac_offs, 1); 974 if (err) 975 return err; 976 977 err = ubifs_leb_write(c, lnum, buf, offs, buf_len); 978 if (err) 979 ubifs_dump_node(c, buf, len); 980 981 return err; 982} 983 984/** 985 * ubifs_write_node - write node to the media. 986 * @c: UBIFS file-system description object 987 * @buf: the node to write 988 * @len: node length 989 * @lnum: logical eraseblock number 990 * @offs: offset within the logical eraseblock 991 * 992 * This function automatically fills node magic number, assigns sequence 993 * number, and calculates node CRC checksum. The length of the @buf buffer has 994 * to be aligned to the minimal I/O unit size. This function automatically 995 * appends padding node and padding bytes if needed. Returns zero in case of 996 * success and a negative error code in case of failure. 997 */ 998int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, 999 int offs) 1000{ 1001 return ubifs_write_node_hmac(c, buf, len, lnum, offs, -1); 1002} 1003 1004/** 1005 * ubifs_read_node_wbuf - read node from the media or write-buffer. 1006 * @wbuf: wbuf to check for un-written data 1007 * @buf: buffer to read to 1008 * @type: node type 1009 * @len: node length 1010 * @lnum: logical eraseblock number 1011 * @offs: offset within the logical eraseblock 1012 * 1013 * This function reads a node of known type and length, checks it and stores 1014 * in @buf. If the node partially or fully sits in the write-buffer, this 1015 * function takes data from the buffer, otherwise it reads the flash media. 1016 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative 1017 * error code in case of failure. 1018 */ 1019int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, 1020 int lnum, int offs) 1021{ 1022 const struct ubifs_info *c = wbuf->c; 1023 int err, rlen, overlap; 1024 struct ubifs_ch *ch = buf; 1025 1026 dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, 1027 dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); 1028 ubifs_assert(c, wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 1029 ubifs_assert(c, !(offs & 7) && offs < c->leb_size); 1030 ubifs_assert(c, type >= 0 && type < UBIFS_NODE_TYPES_CNT); 1031 1032 spin_lock(&wbuf->lock); 1033 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); 1034 if (!overlap) { 1035 /* We may safely unlock the write-buffer and read the data */ 1036 spin_unlock(&wbuf->lock); 1037 return ubifs_read_node(c, buf, type, len, lnum, offs); 1038 } 1039 1040 /* Don't read under wbuf */ 1041 rlen = wbuf->offs - offs; 1042 if (rlen < 0) 1043 rlen = 0; 1044 1045 /* Copy the rest from the write-buffer */ 1046 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); 1047 spin_unlock(&wbuf->lock); 1048 1049 if (rlen > 0) { 1050 /* Read everything that goes before write-buffer */ 1051 err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); 1052 if (err && err != -EBADMSG) 1053 return err; 1054 } 1055 1056 if (type != ch->node_type) { 1057 ubifs_err(c, "bad node type (%d but expected %d)", 1058 ch->node_type, type); 1059 goto out; 1060 } 1061 1062 err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); 1063 if (err) { 1064 ubifs_err(c, "expected node type %d", type); 1065 return err; 1066 } 1067 1068 rlen = le32_to_cpu(ch->len); 1069 if (rlen != len) { 1070 ubifs_err(c, "bad node length %d, expected %d", rlen, len); 1071 goto out; 1072 } 1073 1074 return 0; 1075 1076out: 1077 ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); 1078 ubifs_dump_node(c, buf, len); 1079 dump_stack(); 1080 return -EINVAL; 1081} 1082 1083/** 1084 * ubifs_read_node - read node. 1085 * @c: UBIFS file-system description object 1086 * @buf: buffer to read to 1087 * @type: node type 1088 * @len: node length (not aligned) 1089 * @lnum: logical eraseblock number 1090 * @offs: offset within the logical eraseblock 1091 * 1092 * This function reads a node of known type and length, checks it and 1093 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched 1094 * and a negative error code in case of failure. 1095 */ 1096int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, 1097 int lnum, int offs) 1098{ 1099 int err, l; 1100 struct ubifs_ch *ch = buf; 1101 1102 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 1103 ubifs_assert(c, lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 1104 ubifs_assert(c, len >= UBIFS_CH_SZ && offs + len <= c->leb_size); 1105 ubifs_assert(c, !(offs & 7) && offs < c->leb_size); 1106 ubifs_assert(c, type >= 0 && type < UBIFS_NODE_TYPES_CNT); 1107 1108 err = ubifs_leb_read(c, lnum, buf, offs, len, 0); 1109 if (err && err != -EBADMSG) 1110 return err; 1111 1112 if (type != ch->node_type) { 1113 ubifs_errc(c, "bad node type (%d but expected %d)", 1114 ch->node_type, type); 1115 goto out; 1116 } 1117 1118 err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); 1119 if (err) { 1120 ubifs_errc(c, "expected node type %d", type); 1121 return err; 1122 } 1123 1124 l = le32_to_cpu(ch->len); 1125 if (l != len) { 1126 ubifs_errc(c, "bad node length %d, expected %d", l, len); 1127 goto out; 1128 } 1129 1130 return 0; 1131 1132out: 1133 ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, 1134 offs, ubi_is_mapped(c->ubi, lnum)); 1135 if (!c->probing) { 1136 ubifs_dump_node(c, buf, len); 1137 dump_stack(); 1138 } 1139 return -EINVAL; 1140} 1141 1142/** 1143 * ubifs_wbuf_init - initialize write-buffer. 1144 * @c: UBIFS file-system description object 1145 * @wbuf: write-buffer to initialize 1146 * 1147 * This function initializes write-buffer. Returns zero in case of success 1148 * %-ENOMEM in case of failure. 1149 */ 1150int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 1151{ 1152 size_t size; 1153 1154 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); 1155 if (!wbuf->buf) 1156 return -ENOMEM; 1157 1158 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 1159 wbuf->inodes = kmalloc(size, GFP_KERNEL); 1160 if (!wbuf->inodes) { 1161 kfree(wbuf->buf); 1162 wbuf->buf = NULL; 1163 return -ENOMEM; 1164 } 1165 1166 wbuf->used = 0; 1167 wbuf->lnum = wbuf->offs = -1; 1168 /* 1169 * If the LEB starts at the max. write size aligned address, then 1170 * write-buffer size has to be set to @c->max_write_size. Otherwise, 1171 * set it to something smaller so that it ends at the closest max. 1172 * write size boundary. 1173 */ 1174 size = c->max_write_size - (c->leb_start % c->max_write_size); 1175 wbuf->avail = wbuf->size = size; 1176 wbuf->sync_callback = NULL; 1177 mutex_init(&wbuf->io_mutex); 1178 spin_lock_init(&wbuf->lock); 1179 wbuf->c = c; 1180 wbuf->next_ino = 0; 1181 1182 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1183 wbuf->timer.function = wbuf_timer_callback_nolock; 1184 return 0; 1185} 1186 1187/** 1188 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. 1189 * @wbuf: the write-buffer where to add 1190 * @inum: the inode number 1191 * 1192 * This function adds an inode number to the inode array of the write-buffer. 1193 */ 1194void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) 1195{ 1196 if (!wbuf->buf) 1197 /* NOR flash or something similar */ 1198 return; 1199 1200 spin_lock(&wbuf->lock); 1201 if (wbuf->used) 1202 wbuf->inodes[wbuf->next_ino++] = inum; 1203 spin_unlock(&wbuf->lock); 1204} 1205 1206/** 1207 * wbuf_has_ino - returns if the wbuf contains data from the inode. 1208 * @wbuf: the write-buffer 1209 * @inum: the inode number 1210 * 1211 * This function returns with %1 if the write-buffer contains some data from the 1212 * given inode otherwise it returns with %0. 1213 */ 1214static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) 1215{ 1216 int i, ret = 0; 1217 1218 spin_lock(&wbuf->lock); 1219 for (i = 0; i < wbuf->next_ino; i++) 1220 if (inum == wbuf->inodes[i]) { 1221 ret = 1; 1222 break; 1223 } 1224 spin_unlock(&wbuf->lock); 1225 1226 return ret; 1227} 1228 1229/** 1230 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. 1231 * @c: UBIFS file-system description object 1232 * @inode: inode to synchronize 1233 * 1234 * This function synchronizes write-buffers which contain nodes belonging to 1235 * @inode. Returns zero in case of success and a negative error code in case of 1236 * failure. 1237 */ 1238int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) 1239{ 1240 int i, err = 0; 1241 1242 for (i = 0; i < c->jhead_cnt; i++) { 1243 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 1244 1245 if (i == GCHD) 1246 /* 1247 * GC head is special, do not look at it. Even if the 1248 * head contains something related to this inode, it is 1249 * a _copy_ of corresponding on-flash node which sits 1250 * somewhere else. 1251 */ 1252 continue; 1253 1254 if (!wbuf_has_ino(wbuf, inode->i_ino)) 1255 continue; 1256 1257 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1258 if (wbuf_has_ino(wbuf, inode->i_ino)) 1259 err = ubifs_wbuf_sync_nolock(wbuf); 1260 mutex_unlock(&wbuf->io_mutex); 1261 1262 if (err) { 1263 ubifs_ro_mode(c, err); 1264 return err; 1265 } 1266 } 1267 return 0; 1268}