cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

9p.c (117331B)


      1/*
      2 * Virtio 9p backend
      3 *
      4 * Copyright IBM, Corp. 2010
      5 *
      6 * Authors:
      7 *  Anthony Liguori   <aliguori@us.ibm.com>
      8 *
      9 * This work is licensed under the terms of the GNU GPL, version 2.  See
     10 * the COPYING file in the top-level directory.
     11 *
     12 */
     13
     14/*
     15 * Not so fast! You might want to read the 9p developer docs first:
     16 * https://wiki.qemu.org/Documentation/9p
     17 */
     18
     19#include "qemu/osdep.h"
     20#include <glib/gprintf.h>
     21#include "hw/virtio/virtio.h"
     22#include "qapi/error.h"
     23#include "qemu/error-report.h"
     24#include "qemu/iov.h"
     25#include "qemu/main-loop.h"
     26#include "qemu/sockets.h"
     27#include "virtio-9p.h"
     28#include "fsdev/qemu-fsdev.h"
     29#include "9p-xattr.h"
     30#include "coth.h"
     31#include "trace.h"
     32#include "migration/blocker.h"
     33#include "qemu/xxhash.h"
     34#include <math.h>
     35#include <linux/limits.h>
     36
     37int open_fd_hw;
     38int total_open_fd;
     39static int open_fd_rc;
     40
     41enum {
     42    Oread   = 0x00,
     43    Owrite  = 0x01,
     44    Ordwr   = 0x02,
     45    Oexec   = 0x03,
     46    Oexcl   = 0x04,
     47    Otrunc  = 0x10,
     48    Orexec  = 0x20,
     49    Orclose = 0x40,
     50    Oappend = 0x80,
     51};
     52
     53static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
     54{
     55    ssize_t ret;
     56    va_list ap;
     57
     58    va_start(ap, fmt);
     59    ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
     60    va_end(ap);
     61
     62    return ret;
     63}
     64
     65static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
     66{
     67    ssize_t ret;
     68    va_list ap;
     69
     70    va_start(ap, fmt);
     71    ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
     72    va_end(ap);
     73
     74    return ret;
     75}
     76
     77static int omode_to_uflags(int8_t mode)
     78{
     79    int ret = 0;
     80
     81    switch (mode & 3) {
     82    case Oread:
     83        ret = O_RDONLY;
     84        break;
     85    case Ordwr:
     86        ret = O_RDWR;
     87        break;
     88    case Owrite:
     89        ret = O_WRONLY;
     90        break;
     91    case Oexec:
     92        ret = O_RDONLY;
     93        break;
     94    }
     95
     96    if (mode & Otrunc) {
     97        ret |= O_TRUNC;
     98    }
     99
    100    if (mode & Oappend) {
    101        ret |= O_APPEND;
    102    }
    103
    104    if (mode & Oexcl) {
    105        ret |= O_EXCL;
    106    }
    107
    108    return ret;
    109}
    110
    111typedef struct DotlOpenflagMap {
    112    int dotl_flag;
    113    int open_flag;
    114} DotlOpenflagMap;
    115
    116static int dotl_to_open_flags(int flags)
    117{
    118    int i;
    119    /*
    120     * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
    121     * and P9_DOTL_NOACCESS
    122     */
    123    int oflags = flags & O_ACCMODE;
    124
    125    DotlOpenflagMap dotl_oflag_map[] = {
    126        { P9_DOTL_CREATE, O_CREAT },
    127        { P9_DOTL_EXCL, O_EXCL },
    128        { P9_DOTL_NOCTTY , O_NOCTTY },
    129        { P9_DOTL_TRUNC, O_TRUNC },
    130        { P9_DOTL_APPEND, O_APPEND },
    131        { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
    132        { P9_DOTL_DSYNC, O_DSYNC },
    133        { P9_DOTL_FASYNC, FASYNC },
    134        { P9_DOTL_DIRECT, O_DIRECT },
    135        { P9_DOTL_LARGEFILE, O_LARGEFILE },
    136        { P9_DOTL_DIRECTORY, O_DIRECTORY },
    137        { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
    138        { P9_DOTL_NOATIME, O_NOATIME },
    139        { P9_DOTL_SYNC, O_SYNC },
    140    };
    141
    142    for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
    143        if (flags & dotl_oflag_map[i].dotl_flag) {
    144            oflags |= dotl_oflag_map[i].open_flag;
    145        }
    146    }
    147
    148    return oflags;
    149}
    150
    151void cred_init(FsCred *credp)
    152{
    153    credp->fc_uid = -1;
    154    credp->fc_gid = -1;
    155    credp->fc_mode = -1;
    156    credp->fc_rdev = -1;
    157}
    158
    159static int get_dotl_openflags(V9fsState *s, int oflags)
    160{
    161    int flags;
    162    /*
    163     * Filter the client open flags
    164     */
    165    flags = dotl_to_open_flags(oflags);
    166    flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
    167    /*
    168     * Ignore direct disk access hint until the server supports it.
    169     */
    170    flags &= ~O_DIRECT;
    171    return flags;
    172}
    173
    174void v9fs_path_init(V9fsPath *path)
    175{
    176    path->data = NULL;
    177    path->size = 0;
    178}
    179
    180void v9fs_path_free(V9fsPath *path)
    181{
    182    g_free(path->data);
    183    path->data = NULL;
    184    path->size = 0;
    185}
    186
    187
    188void GCC_FMT_ATTR(2, 3)
    189v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
    190{
    191    va_list ap;
    192
    193    v9fs_path_free(path);
    194
    195    va_start(ap, fmt);
    196    /* Bump the size for including terminating NULL */
    197    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
    198    va_end(ap);
    199}
    200
    201void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
    202{
    203    v9fs_path_free(dst);
    204    dst->size = src->size;
    205    dst->data = g_memdup(src->data, src->size);
    206}
    207
    208int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
    209                      const char *name, V9fsPath *path)
    210{
    211    int err;
    212    err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
    213    if (err < 0) {
    214        err = -errno;
    215    }
    216    return err;
    217}
    218
    219/*
    220 * Return TRUE if s1 is an ancestor of s2.
    221 *
    222 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
    223 * As a special case, We treat s1 as ancestor of s2 if they are same!
    224 */
    225static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
    226{
    227    if (!strncmp(s1->data, s2->data, s1->size - 1)) {
    228        if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
    229            return 1;
    230        }
    231    }
    232    return 0;
    233}
    234
    235static size_t v9fs_string_size(V9fsString *str)
    236{
    237    return str->size;
    238}
    239
    240/*
    241 * returns 0 if fid got re-opened, 1 if not, < 0 on error */
    242static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
    243{
    244    int err = 1;
    245    if (f->fid_type == P9_FID_FILE) {
    246        if (f->fs.fd == -1) {
    247            do {
    248                err = v9fs_co_open(pdu, f, f->open_flags);
    249            } while (err == -EINTR && !pdu->cancelled);
    250        }
    251    } else if (f->fid_type == P9_FID_DIR) {
    252        if (f->fs.dir.stream == NULL) {
    253            do {
    254                err = v9fs_co_opendir(pdu, f);
    255            } while (err == -EINTR && !pdu->cancelled);
    256        }
    257    }
    258    return err;
    259}
    260
    261static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
    262{
    263    int err;
    264    V9fsFidState *f;
    265    V9fsState *s = pdu->s;
    266
    267    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
    268        BUG_ON(f->clunked);
    269        if (f->fid == fid) {
    270            /*
    271             * Update the fid ref upfront so that
    272             * we don't get reclaimed when we yield
    273             * in open later.
    274             */
    275            f->ref++;
    276            /*
    277             * check whether we need to reopen the
    278             * file. We might have closed the fd
    279             * while trying to free up some file
    280             * descriptors.
    281             */
    282            err = v9fs_reopen_fid(pdu, f);
    283            if (err < 0) {
    284                f->ref--;
    285                return NULL;
    286            }
    287            /*
    288             * Mark the fid as referenced so that the LRU
    289             * reclaim won't close the file descriptor
    290             */
    291            f->flags |= FID_REFERENCED;
    292            return f;
    293        }
    294    }
    295    return NULL;
    296}
    297
    298static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
    299{
    300    V9fsFidState *f;
    301
    302    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
    303        /* If fid is already there return NULL */
    304        BUG_ON(f->clunked);
    305        if (f->fid == fid) {
    306            return NULL;
    307        }
    308    }
    309    f = g_malloc0(sizeof(V9fsFidState));
    310    f->fid = fid;
    311    f->fid_type = P9_FID_NONE;
    312    f->ref = 1;
    313    /*
    314     * Mark the fid as referenced so that the LRU
    315     * reclaim won't close the file descriptor
    316     */
    317    f->flags |= FID_REFERENCED;
    318    QSIMPLEQ_INSERT_TAIL(&s->fid_list, f, next);
    319
    320    v9fs_readdir_init(s->proto_version, &f->fs.dir);
    321    v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
    322
    323    return f;
    324}
    325
    326static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
    327{
    328    int retval = 0;
    329
    330    if (fidp->fs.xattr.xattrwalk_fid) {
    331        /* getxattr/listxattr fid */
    332        goto free_value;
    333    }
    334    /*
    335     * if this is fid for setxattr. clunk should
    336     * result in setxattr localcall
    337     */
    338    if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
    339        /* clunk after partial write */
    340        retval = -EINVAL;
    341        goto free_out;
    342    }
    343    if (fidp->fs.xattr.len) {
    344        retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
    345                                   fidp->fs.xattr.value,
    346                                   fidp->fs.xattr.len,
    347                                   fidp->fs.xattr.flags);
    348    } else {
    349        retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
    350    }
    351free_out:
    352    v9fs_string_free(&fidp->fs.xattr.name);
    353free_value:
    354    g_free(fidp->fs.xattr.value);
    355    return retval;
    356}
    357
    358static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
    359{
    360    int retval = 0;
    361
    362    if (fidp->fid_type == P9_FID_FILE) {
    363        /* If we reclaimed the fd no need to close */
    364        if (fidp->fs.fd != -1) {
    365            retval = v9fs_co_close(pdu, &fidp->fs);
    366        }
    367    } else if (fidp->fid_type == P9_FID_DIR) {
    368        if (fidp->fs.dir.stream != NULL) {
    369            retval = v9fs_co_closedir(pdu, &fidp->fs);
    370        }
    371    } else if (fidp->fid_type == P9_FID_XATTR) {
    372        retval = v9fs_xattr_fid_clunk(pdu, fidp);
    373    }
    374    v9fs_path_free(&fidp->path);
    375    g_free(fidp);
    376    return retval;
    377}
    378
    379static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
    380{
    381    BUG_ON(!fidp->ref);
    382    fidp->ref--;
    383    /*
    384     * Don't free the fid if it is in reclaim list
    385     */
    386    if (!fidp->ref && fidp->clunked) {
    387        if (fidp->fid == pdu->s->root_fid) {
    388            /*
    389             * if the clunked fid is root fid then we
    390             * have unmounted the fs on the client side.
    391             * delete the migration blocker. Ideally, this
    392             * should be hooked to transport close notification
    393             */
    394            if (pdu->s->migration_blocker) {
    395                migrate_del_blocker(pdu->s->migration_blocker);
    396                error_free(pdu->s->migration_blocker);
    397                pdu->s->migration_blocker = NULL;
    398            }
    399        }
    400        return free_fid(pdu, fidp);
    401    }
    402    return 0;
    403}
    404
    405static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
    406{
    407    V9fsFidState *fidp;
    408
    409    QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
    410        if (fidp->fid == fid) {
    411            QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
    412            fidp->clunked = true;
    413            return fidp;
    414        }
    415    }
    416    return NULL;
    417}
    418
    419void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
    420{
    421    int reclaim_count = 0;
    422    V9fsState *s = pdu->s;
    423    V9fsFidState *f;
    424    QSLIST_HEAD(, V9fsFidState) reclaim_list =
    425        QSLIST_HEAD_INITIALIZER(reclaim_list);
    426
    427    QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
    428        /*
    429         * Unlink fids cannot be reclaimed. Check
    430         * for them and skip them. Also skip fids
    431         * currently being operated on.
    432         */
    433        if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
    434            continue;
    435        }
    436        /*
    437         * if it is a recently referenced fid
    438         * we leave the fid untouched and clear the
    439         * reference bit. We come back to it later
    440         * in the next iteration. (a simple LRU without
    441         * moving list elements around)
    442         */
    443        if (f->flags & FID_REFERENCED) {
    444            f->flags &= ~FID_REFERENCED;
    445            continue;
    446        }
    447        /*
    448         * Add fids to reclaim list.
    449         */
    450        if (f->fid_type == P9_FID_FILE) {
    451            if (f->fs.fd != -1) {
    452                /*
    453                 * Up the reference count so that
    454                 * a clunk request won't free this fid
    455                 */
    456                f->ref++;
    457                QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
    458                f->fs_reclaim.fd = f->fs.fd;
    459                f->fs.fd = -1;
    460                reclaim_count++;
    461            }
    462        } else if (f->fid_type == P9_FID_DIR) {
    463            if (f->fs.dir.stream != NULL) {
    464                /*
    465                 * Up the reference count so that
    466                 * a clunk request won't free this fid
    467                 */
    468                f->ref++;
    469                QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
    470                f->fs_reclaim.dir.stream = f->fs.dir.stream;
    471                f->fs.dir.stream = NULL;
    472                reclaim_count++;
    473            }
    474        }
    475        if (reclaim_count >= open_fd_rc) {
    476            break;
    477        }
    478    }
    479    /*
    480     * Now close the fid in reclaim list. Free them if they
    481     * are already clunked.
    482     */
    483    while (!QSLIST_EMPTY(&reclaim_list)) {
    484        f = QSLIST_FIRST(&reclaim_list);
    485        QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next);
    486        if (f->fid_type == P9_FID_FILE) {
    487            v9fs_co_close(pdu, &f->fs_reclaim);
    488        } else if (f->fid_type == P9_FID_DIR) {
    489            v9fs_co_closedir(pdu, &f->fs_reclaim);
    490        }
    491        /*
    492         * Now drop the fid reference, free it
    493         * if clunked.
    494         */
    495        put_fid(pdu, f);
    496    }
    497}
    498
    499static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
    500{
    501    int err;
    502    V9fsState *s = pdu->s;
    503    V9fsFidState *fidp, *fidp_next;
    504
    505    fidp = QSIMPLEQ_FIRST(&s->fid_list);
    506    if (!fidp) {
    507        return 0;
    508    }
    509
    510    /*
    511     * v9fs_reopen_fid() can yield : a reference on the fid must be held
    512     * to ensure its pointer remains valid and we can safely pass it to
    513     * QSIMPLEQ_NEXT(). The corresponding put_fid() can also yield so
    514     * we must keep a reference on the next fid as well. So the logic here
    515     * is to get a reference on a fid and only put it back during the next
    516     * iteration after we could get a reference on the next fid. Start with
    517     * the first one.
    518     */
    519    for (fidp->ref++; fidp; fidp = fidp_next) {
    520        if (fidp->path.size == path->size &&
    521            !memcmp(fidp->path.data, path->data, path->size)) {
    522            /* Mark the fid non reclaimable. */
    523            fidp->flags |= FID_NON_RECLAIMABLE;
    524
    525            /* reopen the file/dir if already closed */
    526            err = v9fs_reopen_fid(pdu, fidp);
    527            if (err < 0) {
    528                put_fid(pdu, fidp);
    529                return err;
    530            }
    531        }
    532
    533        fidp_next = QSIMPLEQ_NEXT(fidp, next);
    534
    535        if (fidp_next) {
    536            /*
    537             * Ensure the next fid survives a potential clunk request during
    538             * put_fid() below and v9fs_reopen_fid() in the next iteration.
    539             */
    540            fidp_next->ref++;
    541        }
    542
    543        /* We're done with this fid */
    544        put_fid(pdu, fidp);
    545    }
    546
    547    return 0;
    548}
    549
    550static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
    551{
    552    V9fsState *s = pdu->s;
    553    V9fsFidState *fidp;
    554
    555    /* Free all fids */
    556    while (!QSIMPLEQ_EMPTY(&s->fid_list)) {
    557        /* Get fid */
    558        fidp = QSIMPLEQ_FIRST(&s->fid_list);
    559        fidp->ref++;
    560
    561        /* Clunk fid */
    562        QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
    563        fidp->clunked = true;
    564
    565        put_fid(pdu, fidp);
    566    }
    567}
    568
    569#define P9_QID_TYPE_DIR         0x80
    570#define P9_QID_TYPE_SYMLINK     0x02
    571
    572#define P9_STAT_MODE_DIR        0x80000000
    573#define P9_STAT_MODE_APPEND     0x40000000
    574#define P9_STAT_MODE_EXCL       0x20000000
    575#define P9_STAT_MODE_MOUNT      0x10000000
    576#define P9_STAT_MODE_AUTH       0x08000000
    577#define P9_STAT_MODE_TMP        0x04000000
    578#define P9_STAT_MODE_SYMLINK    0x02000000
    579#define P9_STAT_MODE_LINK       0x01000000
    580#define P9_STAT_MODE_DEVICE     0x00800000
    581#define P9_STAT_MODE_NAMED_PIPE 0x00200000
    582#define P9_STAT_MODE_SOCKET     0x00100000
    583#define P9_STAT_MODE_SETUID     0x00080000
    584#define P9_STAT_MODE_SETGID     0x00040000
    585#define P9_STAT_MODE_SETVTX     0x00010000
    586
    587#define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
    588                                P9_STAT_MODE_SYMLINK |      \
    589                                P9_STAT_MODE_LINK |         \
    590                                P9_STAT_MODE_DEVICE |       \
    591                                P9_STAT_MODE_NAMED_PIPE |   \
    592                                P9_STAT_MODE_SOCKET)
    593
    594/* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
    595static inline uint8_t mirror8bit(uint8_t byte)
    596{
    597    return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
    598}
    599
    600/* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
    601static inline uint64_t mirror64bit(uint64_t value)
    602{
    603    return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
    604           ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
    605           ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
    606           ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
    607           ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
    608           ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
    609           ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
    610           ((uint64_t)mirror8bit((value >> 56) & 0xff));
    611}
    612
    613/**
    614 * @brief Parameter k for the Exponential Golomb algorihm to be used.
    615 *
    616 * The smaller this value, the smaller the minimum bit count for the Exp.
    617 * Golomb generated affixes will be (at lowest index) however for the
    618 * price of having higher maximum bit count of generated affixes (at highest
    619 * index). Likewise increasing this parameter yields in smaller maximum bit
    620 * count for the price of having higher minimum bit count.
    621 *
    622 * In practice that means: a good value for k depends on the expected amount
    623 * of devices to be exposed by one export. For a small amount of devices k
    624 * should be small, for a large amount of devices k might be increased
    625 * instead. The default of k=0 should be fine for most users though.
    626 *
    627 * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
    628 * k should not change as long as guest is still running! Because that would
    629 * cause completely different inode numbers to be generated on guest.
    630 */
    631#define EXP_GOLOMB_K    0
    632
    633/**
    634 * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
    635 *
    636 * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
    637 * with growing length and with the mathematical property of being
    638 * "prefix-free". The latter means the generated prefixes can be prepended
    639 * in front of arbitrary numbers and the resulting concatenated numbers are
    640 * guaranteed to be always unique.
    641 *
    642 * This is a minor adjustment to the original Exp. Golomb algorithm in the
    643 * sense that lowest allowed index (@param n) starts with 1, not with zero.
    644 *
    645 * @param n - natural number (or index) of the prefix to be generated
    646 *            (1, 2, 3, ...)
    647 * @param k - parameter k of Exp. Golomb algorithm to be used
    648 *            (see comment on EXP_GOLOMB_K macro for details about k)
    649 */
    650static VariLenAffix expGolombEncode(uint64_t n, int k)
    651{
    652    const uint64_t value = n + (1 << k) - 1;
    653    const int bits = (int) log2(value) + 1;
    654    return (VariLenAffix) {
    655        .type = AffixType_Prefix,
    656        .value = value,
    657        .bits = bits + MAX((bits - 1 - k), 0)
    658    };
    659}
    660
    661/**
    662 * @brief Converts a suffix into a prefix, or a prefix into a suffix.
    663 *
    664 * Simply mirror all bits of the affix value, for the purpose to preserve
    665 * respectively the mathematical "prefix-free" or "suffix-free" property
    666 * after the conversion.
    667 *
    668 * If a passed prefix is suitable to create unique numbers, then the
    669 * returned suffix is suitable to create unique numbers as well (and vice
    670 * versa).
    671 */
    672static VariLenAffix invertAffix(const VariLenAffix *affix)
    673{
    674    return (VariLenAffix) {
    675        .type =
    676            (affix->type == AffixType_Suffix) ?
    677                AffixType_Prefix : AffixType_Suffix,
    678        .value =
    679            mirror64bit(affix->value) >>
    680            ((sizeof(affix->value) * 8) - affix->bits),
    681        .bits = affix->bits
    682    };
    683}
    684
    685/**
    686 * @brief Generates suffix numbers with "suffix-free" property.
    687 *
    688 * This is just a wrapper function on top of the Exp. Golomb algorithm.
    689 *
    690 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
    691 * this function converts the Exp. Golomb prefixes into appropriate suffixes
    692 * which are still suitable for generating unique numbers.
    693 *
    694 * @param n - natural number (or index) of the suffix to be generated
    695 *            (1, 2, 3, ...)
    696 */
    697static VariLenAffix affixForIndex(uint64_t index)
    698{
    699    VariLenAffix prefix;
    700    prefix = expGolombEncode(index, EXP_GOLOMB_K);
    701    return invertAffix(&prefix); /* convert prefix to suffix */
    702}
    703
    704/* creative abuse of tb_hash_func7, which is based on xxhash */
    705static uint32_t qpp_hash(QppEntry e)
    706{
    707    return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
    708}
    709
    710static uint32_t qpf_hash(QpfEntry e)
    711{
    712    return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
    713}
    714
    715static bool qpd_cmp_func(const void *obj, const void *userp)
    716{
    717    const QpdEntry *e1 = obj, *e2 = userp;
    718    return e1->dev == e2->dev;
    719}
    720
    721static bool qpp_cmp_func(const void *obj, const void *userp)
    722{
    723    const QppEntry *e1 = obj, *e2 = userp;
    724    return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
    725}
    726
    727static bool qpf_cmp_func(const void *obj, const void *userp)
    728{
    729    const QpfEntry *e1 = obj, *e2 = userp;
    730    return e1->dev == e2->dev && e1->ino == e2->ino;
    731}
    732
    733static void qp_table_remove(void *p, uint32_t h, void *up)
    734{
    735    g_free(p);
    736}
    737
    738static void qp_table_destroy(struct qht *ht)
    739{
    740    if (!ht || !ht->map) {
    741        return;
    742    }
    743    qht_iter(ht, qp_table_remove, NULL);
    744    qht_destroy(ht);
    745}
    746
    747static void qpd_table_init(struct qht *ht)
    748{
    749    qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
    750}
    751
    752static void qpp_table_init(struct qht *ht)
    753{
    754    qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
    755}
    756
    757static void qpf_table_init(struct qht *ht)
    758{
    759    qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
    760}
    761
    762/*
    763 * Returns how many (high end) bits of inode numbers of the passed fs
    764 * device shall be used (in combination with the device number) to
    765 * generate hash values for qpp_table entries.
    766 *
    767 * This function is required if variable length suffixes are used for inode
    768 * number mapping on guest level. Since a device may end up having multiple
    769 * entries in qpp_table, each entry most probably with a different suffix
    770 * length, we thus need this function in conjunction with qpd_table to
    771 * "agree" about a fix amount of bits (per device) to be always used for
    772 * generating hash values for the purpose of accessing qpp_table in order
    773 * get consistent behaviour when accessing qpp_table.
    774 */
    775static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
    776{
    777    QpdEntry lookup = {
    778        .dev = dev
    779    }, *val;
    780    uint32_t hash = dev;
    781    VariLenAffix affix;
    782
    783    val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
    784    if (!val) {
    785        val = g_malloc0(sizeof(QpdEntry));
    786        *val = lookup;
    787        affix = affixForIndex(pdu->s->qp_affix_next);
    788        val->prefix_bits = affix.bits;
    789        qht_insert(&pdu->s->qpd_table, val, hash, NULL);
    790        pdu->s->qp_ndevices++;
    791    }
    792    return val->prefix_bits;
    793}
    794
    795/**
    796 * @brief Slow / full mapping host inode nr -> guest inode nr.
    797 *
    798 * This function performs a slower and much more costly remapping of an
    799 * original file inode number on host to an appropriate different inode
    800 * number on guest. For every (dev, inode) combination on host a new
    801 * sequential number is generated, cached and exposed as inode number on
    802 * guest.
    803 *
    804 * This is just a "last resort" fallback solution if the much faster/cheaper
    805 * qid_path_suffixmap() failed. In practice this slow / full mapping is not
    806 * expected ever to be used at all though.
    807 *
    808 * @see qid_path_suffixmap() for details
    809 *
    810 */
    811static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
    812                            uint64_t *path)
    813{
    814    QpfEntry lookup = {
    815        .dev = stbuf->st_dev,
    816        .ino = stbuf->st_ino
    817    }, *val;
    818    uint32_t hash = qpf_hash(lookup);
    819    VariLenAffix affix;
    820
    821    val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
    822
    823    if (!val) {
    824        if (pdu->s->qp_fullpath_next == 0) {
    825            /* no more files can be mapped :'( */
    826            error_report_once(
    827                "9p: No more prefixes available for remapping inodes from "
    828                "host to guest."
    829            );
    830            return -ENFILE;
    831        }
    832
    833        val = g_malloc0(sizeof(QppEntry));
    834        *val = lookup;
    835
    836        /* new unique inode and device combo */
    837        affix = affixForIndex(
    838            1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
    839        );
    840        val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
    841        pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
    842        qht_insert(&pdu->s->qpf_table, val, hash, NULL);
    843    }
    844
    845    *path = val->path;
    846    return 0;
    847}
    848
    849/**
    850 * @brief Quick mapping host inode nr -> guest inode nr.
    851 *
    852 * This function performs quick remapping of an original file inode number
    853 * on host to an appropriate different inode number on guest. This remapping
    854 * of inodes is required to avoid inode nr collisions on guest which would
    855 * happen if the 9p export contains more than 1 exported file system (or
    856 * more than 1 file system data set), because unlike on host level where the
    857 * files would have different device nrs, all files exported by 9p would
    858 * share the same device nr on guest (the device nr of the virtual 9p device
    859 * that is).
    860 *
    861 * Inode remapping is performed by chopping off high end bits of the original
    862 * inode number from host, shifting the result upwards and then assigning a
    863 * generated suffix number for the low end bits, where the same suffix number
    864 * will be shared by all inodes with the same device id AND the same high end
    865 * bits that have been chopped off. That approach utilizes the fact that inode
    866 * numbers very likely share the same high end bits (i.e. due to their common
    867 * sequential generation by file systems) and hence we only have to generate
    868 * and track a very limited amount of suffixes in practice due to that.
    869 *
    870 * We generate variable size suffixes for that purpose. The 1st generated
    871 * suffix will only have 1 bit and hence we only need to chop off 1 bit from
    872 * the original inode number. The subsequent suffixes being generated will
    873 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
    874 * generated will have 3 bits and hence we have to chop off 3 bits from their
    875 * original inodes, and so on. That approach of using variable length suffixes
    876 * (i.e. over fixed size ones) utilizes the fact that in practice only a very
    877 * limited amount of devices are shared by the same export (e.g. typically
    878 * less than 2 dozen devices per 9p export), so in practice we need to chop
    879 * off less bits than with fixed size prefixes and yet are flexible to add
    880 * new devices at runtime below host's export directory at any time without
    881 * having to reboot guest nor requiring to reconfigure guest for that. And due
    882 * to the very limited amount of original high end bits that we chop off that
    883 * way, the total amount of suffixes we need to generate is less than by using
    884 * fixed size prefixes and hence it also improves performance of the inode
    885 * remapping algorithm, and finally has the nice side effect that the inode
    886 * numbers on guest will be much smaller & human friendly. ;-)
    887 */
    888static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
    889                              uint64_t *path)
    890{
    891    const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
    892    QppEntry lookup = {
    893        .dev = stbuf->st_dev,
    894        .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
    895    }, *val;
    896    uint32_t hash = qpp_hash(lookup);
    897
    898    val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
    899
    900    if (!val) {
    901        if (pdu->s->qp_affix_next == 0) {
    902            /* we ran out of affixes */
    903            warn_report_once(
    904                "9p: Potential degraded performance of inode remapping"
    905            );
    906            return -ENFILE;
    907        }
    908
    909        val = g_malloc0(sizeof(QppEntry));
    910        *val = lookup;
    911
    912        /* new unique inode affix and device combo */
    913        val->qp_affix_index = pdu->s->qp_affix_next++;
    914        val->qp_affix = affixForIndex(val->qp_affix_index);
    915        qht_insert(&pdu->s->qpp_table, val, hash, NULL);
    916    }
    917    /* assuming generated affix to be suffix type, not prefix */
    918    *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
    919    return 0;
    920}
    921
    922static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
    923{
    924    int err;
    925    size_t size;
    926
    927    if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
    928        /* map inode+device to qid path (fast path) */
    929        err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
    930        if (err == -ENFILE) {
    931            /* fast path didn't work, fall back to full map */
    932            err = qid_path_fullmap(pdu, stbuf, &qidp->path);
    933        }
    934        if (err) {
    935            return err;
    936        }
    937    } else {
    938        if (pdu->s->dev_id != stbuf->st_dev) {
    939            if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
    940                error_report_once(
    941                    "9p: Multiple devices detected in same VirtFS export. "
    942                    "Access of guest to additional devices is (partly) "
    943                    "denied due to virtfs option 'multidevs=forbid' being "
    944                    "effective."
    945                );
    946                return -ENODEV;
    947            } else {
    948                warn_report_once(
    949                    "9p: Multiple devices detected in same VirtFS export, "
    950                    "which might lead to file ID collisions and severe "
    951                    "misbehaviours on guest! You should either use a "
    952                    "separate export for each device shared from host or "
    953                    "use virtfs option 'multidevs=remap'!"
    954                );
    955            }
    956        }
    957        memset(&qidp->path, 0, sizeof(qidp->path));
    958        size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
    959        memcpy(&qidp->path, &stbuf->st_ino, size);
    960    }
    961
    962    qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
    963    qidp->type = 0;
    964    if (S_ISDIR(stbuf->st_mode)) {
    965        qidp->type |= P9_QID_TYPE_DIR;
    966    }
    967    if (S_ISLNK(stbuf->st_mode)) {
    968        qidp->type |= P9_QID_TYPE_SYMLINK;
    969    }
    970
    971    return 0;
    972}
    973
    974V9fsPDU *pdu_alloc(V9fsState *s)
    975{
    976    V9fsPDU *pdu = NULL;
    977
    978    if (!QLIST_EMPTY(&s->free_list)) {
    979        pdu = QLIST_FIRST(&s->free_list);
    980        QLIST_REMOVE(pdu, next);
    981        QLIST_INSERT_HEAD(&s->active_list, pdu, next);
    982    }
    983    return pdu;
    984}
    985
    986void pdu_free(V9fsPDU *pdu)
    987{
    988    V9fsState *s = pdu->s;
    989
    990    g_assert(!pdu->cancelled);
    991    QLIST_REMOVE(pdu, next);
    992    QLIST_INSERT_HEAD(&s->free_list, pdu, next);
    993}
    994
    995static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
    996{
    997    int8_t id = pdu->id + 1; /* Response */
    998    V9fsState *s = pdu->s;
    999    int ret;
   1000
   1001    /*
   1002     * The 9p spec requires that successfully cancelled pdus receive no reply.
   1003     * Sending a reply would confuse clients because they would
   1004     * assume that any EINTR is the actual result of the operation,
   1005     * rather than a consequence of the cancellation. However, if
   1006     * the operation completed (succesfully or with an error other
   1007     * than caused be cancellation), we do send out that reply, both
   1008     * for efficiency and to avoid confusing the rest of the state machine
   1009     * that assumes passing a non-error here will mean a successful
   1010     * transmission of the reply.
   1011     */
   1012    bool discard = pdu->cancelled && len == -EINTR;
   1013    if (discard) {
   1014        trace_v9fs_rcancel(pdu->tag, pdu->id);
   1015        pdu->size = 0;
   1016        goto out_notify;
   1017    }
   1018
   1019    if (len < 0) {
   1020        int err = -len;
   1021        len = 7;
   1022
   1023        if (s->proto_version != V9FS_PROTO_2000L) {
   1024            V9fsString str;
   1025
   1026            str.data = strerror(err);
   1027            str.size = strlen(str.data);
   1028
   1029            ret = pdu_marshal(pdu, len, "s", &str);
   1030            if (ret < 0) {
   1031                goto out_notify;
   1032            }
   1033            len += ret;
   1034            id = P9_RERROR;
   1035        }
   1036
   1037        ret = pdu_marshal(pdu, len, "d", err);
   1038        if (ret < 0) {
   1039            goto out_notify;
   1040        }
   1041        len += ret;
   1042
   1043        if (s->proto_version == V9FS_PROTO_2000L) {
   1044            id = P9_RLERROR;
   1045        }
   1046        trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
   1047    }
   1048
   1049    /* fill out the header */
   1050    if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
   1051        goto out_notify;
   1052    }
   1053
   1054    /* keep these in sync */
   1055    pdu->size = len;
   1056    pdu->id = id;
   1057
   1058out_notify:
   1059    pdu->s->transport->push_and_notify(pdu);
   1060
   1061    /* Now wakeup anybody waiting in flush for this request */
   1062    if (!qemu_co_queue_next(&pdu->complete)) {
   1063        pdu_free(pdu);
   1064    }
   1065}
   1066
   1067static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
   1068{
   1069    mode_t ret;
   1070
   1071    ret = mode & 0777;
   1072    if (mode & P9_STAT_MODE_DIR) {
   1073        ret |= S_IFDIR;
   1074    }
   1075
   1076    if (mode & P9_STAT_MODE_SYMLINK) {
   1077        ret |= S_IFLNK;
   1078    }
   1079    if (mode & P9_STAT_MODE_SOCKET) {
   1080        ret |= S_IFSOCK;
   1081    }
   1082    if (mode & P9_STAT_MODE_NAMED_PIPE) {
   1083        ret |= S_IFIFO;
   1084    }
   1085    if (mode & P9_STAT_MODE_DEVICE) {
   1086        if (extension->size && extension->data[0] == 'c') {
   1087            ret |= S_IFCHR;
   1088        } else {
   1089            ret |= S_IFBLK;
   1090        }
   1091    }
   1092
   1093    if (!(ret & ~0777)) {
   1094        ret |= S_IFREG;
   1095    }
   1096
   1097    if (mode & P9_STAT_MODE_SETUID) {
   1098        ret |= S_ISUID;
   1099    }
   1100    if (mode & P9_STAT_MODE_SETGID) {
   1101        ret |= S_ISGID;
   1102    }
   1103    if (mode & P9_STAT_MODE_SETVTX) {
   1104        ret |= S_ISVTX;
   1105    }
   1106
   1107    return ret;
   1108}
   1109
   1110static int donttouch_stat(V9fsStat *stat)
   1111{
   1112    if (stat->type == -1 &&
   1113        stat->dev == -1 &&
   1114        stat->qid.type == 0xff &&
   1115        stat->qid.version == (uint32_t) -1 &&
   1116        stat->qid.path == (uint64_t) -1 &&
   1117        stat->mode == -1 &&
   1118        stat->atime == -1 &&
   1119        stat->mtime == -1 &&
   1120        stat->length == -1 &&
   1121        !stat->name.size &&
   1122        !stat->uid.size &&
   1123        !stat->gid.size &&
   1124        !stat->muid.size &&
   1125        stat->n_uid == -1 &&
   1126        stat->n_gid == -1 &&
   1127        stat->n_muid == -1) {
   1128        return 1;
   1129    }
   1130
   1131    return 0;
   1132}
   1133
   1134static void v9fs_stat_init(V9fsStat *stat)
   1135{
   1136    v9fs_string_init(&stat->name);
   1137    v9fs_string_init(&stat->uid);
   1138    v9fs_string_init(&stat->gid);
   1139    v9fs_string_init(&stat->muid);
   1140    v9fs_string_init(&stat->extension);
   1141}
   1142
   1143static void v9fs_stat_free(V9fsStat *stat)
   1144{
   1145    v9fs_string_free(&stat->name);
   1146    v9fs_string_free(&stat->uid);
   1147    v9fs_string_free(&stat->gid);
   1148    v9fs_string_free(&stat->muid);
   1149    v9fs_string_free(&stat->extension);
   1150}
   1151
   1152static uint32_t stat_to_v9mode(const struct stat *stbuf)
   1153{
   1154    uint32_t mode;
   1155
   1156    mode = stbuf->st_mode & 0777;
   1157    if (S_ISDIR(stbuf->st_mode)) {
   1158        mode |= P9_STAT_MODE_DIR;
   1159    }
   1160
   1161    if (S_ISLNK(stbuf->st_mode)) {
   1162        mode |= P9_STAT_MODE_SYMLINK;
   1163    }
   1164
   1165    if (S_ISSOCK(stbuf->st_mode)) {
   1166        mode |= P9_STAT_MODE_SOCKET;
   1167    }
   1168
   1169    if (S_ISFIFO(stbuf->st_mode)) {
   1170        mode |= P9_STAT_MODE_NAMED_PIPE;
   1171    }
   1172
   1173    if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
   1174        mode |= P9_STAT_MODE_DEVICE;
   1175    }
   1176
   1177    if (stbuf->st_mode & S_ISUID) {
   1178        mode |= P9_STAT_MODE_SETUID;
   1179    }
   1180
   1181    if (stbuf->st_mode & S_ISGID) {
   1182        mode |= P9_STAT_MODE_SETGID;
   1183    }
   1184
   1185    if (stbuf->st_mode & S_ISVTX) {
   1186        mode |= P9_STAT_MODE_SETVTX;
   1187    }
   1188
   1189    return mode;
   1190}
   1191
   1192static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
   1193                                       const char *basename,
   1194                                       const struct stat *stbuf,
   1195                                       V9fsStat *v9stat)
   1196{
   1197    int err;
   1198
   1199    memset(v9stat, 0, sizeof(*v9stat));
   1200
   1201    err = stat_to_qid(pdu, stbuf, &v9stat->qid);
   1202    if (err < 0) {
   1203        return err;
   1204    }
   1205    v9stat->mode = stat_to_v9mode(stbuf);
   1206    v9stat->atime = stbuf->st_atime;
   1207    v9stat->mtime = stbuf->st_mtime;
   1208    v9stat->length = stbuf->st_size;
   1209
   1210    v9fs_string_free(&v9stat->uid);
   1211    v9fs_string_free(&v9stat->gid);
   1212    v9fs_string_free(&v9stat->muid);
   1213
   1214    v9stat->n_uid = stbuf->st_uid;
   1215    v9stat->n_gid = stbuf->st_gid;
   1216    v9stat->n_muid = 0;
   1217
   1218    v9fs_string_free(&v9stat->extension);
   1219
   1220    if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
   1221        err = v9fs_co_readlink(pdu, path, &v9stat->extension);
   1222        if (err < 0) {
   1223            return err;
   1224        }
   1225    } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
   1226        v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
   1227                S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
   1228                major(stbuf->st_rdev), minor(stbuf->st_rdev));
   1229    } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
   1230        v9fs_string_sprintf(&v9stat->extension, "%s %lu",
   1231                "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
   1232    }
   1233
   1234    v9fs_string_sprintf(&v9stat->name, "%s", basename);
   1235
   1236    v9stat->size = 61 +
   1237        v9fs_string_size(&v9stat->name) +
   1238        v9fs_string_size(&v9stat->uid) +
   1239        v9fs_string_size(&v9stat->gid) +
   1240        v9fs_string_size(&v9stat->muid) +
   1241        v9fs_string_size(&v9stat->extension);
   1242    return 0;
   1243}
   1244
   1245#define P9_STATS_MODE          0x00000001ULL
   1246#define P9_STATS_NLINK         0x00000002ULL
   1247#define P9_STATS_UID           0x00000004ULL
   1248#define P9_STATS_GID           0x00000008ULL
   1249#define P9_STATS_RDEV          0x00000010ULL
   1250#define P9_STATS_ATIME         0x00000020ULL
   1251#define P9_STATS_MTIME         0x00000040ULL
   1252#define P9_STATS_CTIME         0x00000080ULL
   1253#define P9_STATS_INO           0x00000100ULL
   1254#define P9_STATS_SIZE          0x00000200ULL
   1255#define P9_STATS_BLOCKS        0x00000400ULL
   1256
   1257#define P9_STATS_BTIME         0x00000800ULL
   1258#define P9_STATS_GEN           0x00001000ULL
   1259#define P9_STATS_DATA_VERSION  0x00002000ULL
   1260
   1261#define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
   1262#define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
   1263
   1264
   1265static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
   1266                                V9fsStatDotl *v9lstat)
   1267{
   1268    memset(v9lstat, 0, sizeof(*v9lstat));
   1269
   1270    v9lstat->st_mode = stbuf->st_mode;
   1271    v9lstat->st_nlink = stbuf->st_nlink;
   1272    v9lstat->st_uid = stbuf->st_uid;
   1273    v9lstat->st_gid = stbuf->st_gid;
   1274    v9lstat->st_rdev = stbuf->st_rdev;
   1275    v9lstat->st_size = stbuf->st_size;
   1276    v9lstat->st_blksize = stbuf->st_blksize;
   1277    v9lstat->st_blocks = stbuf->st_blocks;
   1278    v9lstat->st_atime_sec = stbuf->st_atime;
   1279    v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
   1280    v9lstat->st_mtime_sec = stbuf->st_mtime;
   1281    v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
   1282    v9lstat->st_ctime_sec = stbuf->st_ctime;
   1283    v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
   1284    /* Currently we only support BASIC fields in stat */
   1285    v9lstat->st_result_mask = P9_STATS_BASIC;
   1286
   1287    return stat_to_qid(pdu, stbuf, &v9lstat->qid);
   1288}
   1289
   1290static void print_sg(struct iovec *sg, int cnt)
   1291{
   1292    int i;
   1293
   1294    printf("sg[%d]: {", cnt);
   1295    for (i = 0; i < cnt; i++) {
   1296        if (i) {
   1297            printf(", ");
   1298        }
   1299        printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
   1300    }
   1301    printf("}\n");
   1302}
   1303
   1304/* Will call this only for path name based fid */
   1305static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
   1306{
   1307    V9fsPath str;
   1308    v9fs_path_init(&str);
   1309    v9fs_path_copy(&str, dst);
   1310    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
   1311    v9fs_path_free(&str);
   1312}
   1313
   1314static inline bool is_ro_export(FsContext *ctx)
   1315{
   1316    return ctx->export_flags & V9FS_RDONLY;
   1317}
   1318
   1319static void coroutine_fn v9fs_version(void *opaque)
   1320{
   1321    ssize_t err;
   1322    V9fsPDU *pdu = opaque;
   1323    V9fsState *s = pdu->s;
   1324    V9fsString version;
   1325    size_t offset = 7;
   1326
   1327    v9fs_string_init(&version);
   1328    err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
   1329    if (err < 0) {
   1330        goto out;
   1331    }
   1332    trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
   1333
   1334    virtfs_reset(pdu);
   1335
   1336    if (!strcmp(version.data, "9P2000.u")) {
   1337        s->proto_version = V9FS_PROTO_2000U;
   1338    } else if (!strcmp(version.data, "9P2000.L")) {
   1339        s->proto_version = V9FS_PROTO_2000L;
   1340    } else {
   1341        v9fs_string_sprintf(&version, "unknown");
   1342        /* skip min. msize check, reporting invalid version has priority */
   1343        goto marshal;
   1344    }
   1345
   1346    if (s->msize < P9_MIN_MSIZE) {
   1347        err = -EMSGSIZE;
   1348        error_report(
   1349            "9pfs: Client requested msize < minimum msize ("
   1350            stringify(P9_MIN_MSIZE) ") supported by this server."
   1351        );
   1352        goto out;
   1353    }
   1354
   1355    /* 8192 is the default msize of Linux clients */
   1356    if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) {
   1357        warn_report_once(
   1358            "9p: degraded performance: a reasonable high msize should be "
   1359            "chosen on client/guest side (chosen msize is <= 8192). See "
   1360            "https://wiki.qemu.org/Documentation/9psetup#msize for details."
   1361        );
   1362    }
   1363
   1364marshal:
   1365    err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
   1366    if (err < 0) {
   1367        goto out;
   1368    }
   1369    err += offset;
   1370    trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
   1371out:
   1372    pdu_complete(pdu, err);
   1373    v9fs_string_free(&version);
   1374}
   1375
   1376static void coroutine_fn v9fs_attach(void *opaque)
   1377{
   1378    V9fsPDU *pdu = opaque;
   1379    V9fsState *s = pdu->s;
   1380    int32_t fid, afid, n_uname;
   1381    V9fsString uname, aname;
   1382    V9fsFidState *fidp;
   1383    size_t offset = 7;
   1384    V9fsQID qid;
   1385    ssize_t err;
   1386    struct stat stbuf;
   1387
   1388    v9fs_string_init(&uname);
   1389    v9fs_string_init(&aname);
   1390    err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
   1391                        &afid, &uname, &aname, &n_uname);
   1392    if (err < 0) {
   1393        goto out_nofid;
   1394    }
   1395    trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
   1396
   1397    fidp = alloc_fid(s, fid);
   1398    if (fidp == NULL) {
   1399        err = -EINVAL;
   1400        goto out_nofid;
   1401    }
   1402    fidp->uid = n_uname;
   1403    err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
   1404    if (err < 0) {
   1405        err = -EINVAL;
   1406        clunk_fid(s, fid);
   1407        goto out;
   1408    }
   1409    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
   1410    if (err < 0) {
   1411        err = -EINVAL;
   1412        clunk_fid(s, fid);
   1413        goto out;
   1414    }
   1415    err = stat_to_qid(pdu, &stbuf, &qid);
   1416    if (err < 0) {
   1417        err = -EINVAL;
   1418        clunk_fid(s, fid);
   1419        goto out;
   1420    }
   1421
   1422    /*
   1423     * disable migration if we haven't done already.
   1424     * attach could get called multiple times for the same export.
   1425     */
   1426    if (!s->migration_blocker) {
   1427        error_setg(&s->migration_blocker,
   1428                   "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
   1429                   s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
   1430        err = migrate_add_blocker(s->migration_blocker, NULL);
   1431        if (err < 0) {
   1432            error_free(s->migration_blocker);
   1433            s->migration_blocker = NULL;
   1434            clunk_fid(s, fid);
   1435            goto out;
   1436        }
   1437        s->root_fid = fid;
   1438    }
   1439
   1440    err = pdu_marshal(pdu, offset, "Q", &qid);
   1441    if (err < 0) {
   1442        clunk_fid(s, fid);
   1443        goto out;
   1444    }
   1445    err += offset;
   1446
   1447    memcpy(&s->root_st, &stbuf, sizeof(stbuf));
   1448    trace_v9fs_attach_return(pdu->tag, pdu->id,
   1449                             qid.type, qid.version, qid.path);
   1450out:
   1451    put_fid(pdu, fidp);
   1452out_nofid:
   1453    pdu_complete(pdu, err);
   1454    v9fs_string_free(&uname);
   1455    v9fs_string_free(&aname);
   1456}
   1457
   1458static void coroutine_fn v9fs_stat(void *opaque)
   1459{
   1460    int32_t fid;
   1461    V9fsStat v9stat;
   1462    ssize_t err = 0;
   1463    size_t offset = 7;
   1464    struct stat stbuf;
   1465    V9fsFidState *fidp;
   1466    V9fsPDU *pdu = opaque;
   1467    char *basename;
   1468
   1469    err = pdu_unmarshal(pdu, offset, "d", &fid);
   1470    if (err < 0) {
   1471        goto out_nofid;
   1472    }
   1473    trace_v9fs_stat(pdu->tag, pdu->id, fid);
   1474
   1475    fidp = get_fid(pdu, fid);
   1476    if (fidp == NULL) {
   1477        err = -ENOENT;
   1478        goto out_nofid;
   1479    }
   1480    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
   1481    if (err < 0) {
   1482        goto out;
   1483    }
   1484    basename = g_path_get_basename(fidp->path.data);
   1485    err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
   1486    g_free(basename);
   1487    if (err < 0) {
   1488        goto out;
   1489    }
   1490    err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
   1491    if (err < 0) {
   1492        v9fs_stat_free(&v9stat);
   1493        goto out;
   1494    }
   1495    trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
   1496                           v9stat.atime, v9stat.mtime, v9stat.length);
   1497    err += offset;
   1498    v9fs_stat_free(&v9stat);
   1499out:
   1500    put_fid(pdu, fidp);
   1501out_nofid:
   1502    pdu_complete(pdu, err);
   1503}
   1504
   1505static void coroutine_fn v9fs_getattr(void *opaque)
   1506{
   1507    int32_t fid;
   1508    size_t offset = 7;
   1509    ssize_t retval = 0;
   1510    struct stat stbuf;
   1511    V9fsFidState *fidp;
   1512    uint64_t request_mask;
   1513    V9fsStatDotl v9stat_dotl;
   1514    V9fsPDU *pdu = opaque;
   1515
   1516    retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
   1517    if (retval < 0) {
   1518        goto out_nofid;
   1519    }
   1520    trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
   1521
   1522    fidp = get_fid(pdu, fid);
   1523    if (fidp == NULL) {
   1524        retval = -ENOENT;
   1525        goto out_nofid;
   1526    }
   1527    /*
   1528     * Currently we only support BASIC fields in stat, so there is no
   1529     * need to look at request_mask.
   1530     */
   1531    retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
   1532    if (retval < 0) {
   1533        goto out;
   1534    }
   1535    retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
   1536    if (retval < 0) {
   1537        goto out;
   1538    }
   1539
   1540    /*  fill st_gen if requested and supported by underlying fs */
   1541    if (request_mask & P9_STATS_GEN) {
   1542        retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
   1543        switch (retval) {
   1544        case 0:
   1545            /* we have valid st_gen: update result mask */
   1546            v9stat_dotl.st_result_mask |= P9_STATS_GEN;
   1547            break;
   1548        case -EINTR:
   1549            /* request cancelled, e.g. by Tflush */
   1550            goto out;
   1551        default:
   1552            /* failed to get st_gen: not fatal, ignore */
   1553            break;
   1554        }
   1555    }
   1556    retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
   1557    if (retval < 0) {
   1558        goto out;
   1559    }
   1560    retval += offset;
   1561    trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
   1562                              v9stat_dotl.st_mode, v9stat_dotl.st_uid,
   1563                              v9stat_dotl.st_gid);
   1564out:
   1565    put_fid(pdu, fidp);
   1566out_nofid:
   1567    pdu_complete(pdu, retval);
   1568}
   1569
   1570/* Attribute flags */
   1571#define P9_ATTR_MODE       (1 << 0)
   1572#define P9_ATTR_UID        (1 << 1)
   1573#define P9_ATTR_GID        (1 << 2)
   1574#define P9_ATTR_SIZE       (1 << 3)
   1575#define P9_ATTR_ATIME      (1 << 4)
   1576#define P9_ATTR_MTIME      (1 << 5)
   1577#define P9_ATTR_CTIME      (1 << 6)
   1578#define P9_ATTR_ATIME_SET  (1 << 7)
   1579#define P9_ATTR_MTIME_SET  (1 << 8)
   1580
   1581#define P9_ATTR_MASK    127
   1582
   1583static void coroutine_fn v9fs_setattr(void *opaque)
   1584{
   1585    int err = 0;
   1586    int32_t fid;
   1587    V9fsFidState *fidp;
   1588    size_t offset = 7;
   1589    V9fsIattr v9iattr;
   1590    V9fsPDU *pdu = opaque;
   1591
   1592    err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
   1593    if (err < 0) {
   1594        goto out_nofid;
   1595    }
   1596
   1597    trace_v9fs_setattr(pdu->tag, pdu->id, fid,
   1598                       v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
   1599                       v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
   1600
   1601    fidp = get_fid(pdu, fid);
   1602    if (fidp == NULL) {
   1603        err = -EINVAL;
   1604        goto out_nofid;
   1605    }
   1606    if (v9iattr.valid & P9_ATTR_MODE) {
   1607        err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
   1608        if (err < 0) {
   1609            goto out;
   1610        }
   1611    }
   1612    if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
   1613        struct timespec times[2];
   1614        if (v9iattr.valid & P9_ATTR_ATIME) {
   1615            if (v9iattr.valid & P9_ATTR_ATIME_SET) {
   1616                times[0].tv_sec = v9iattr.atime_sec;
   1617                times[0].tv_nsec = v9iattr.atime_nsec;
   1618            } else {
   1619                times[0].tv_nsec = UTIME_NOW;
   1620            }
   1621        } else {
   1622            times[0].tv_nsec = UTIME_OMIT;
   1623        }
   1624        if (v9iattr.valid & P9_ATTR_MTIME) {
   1625            if (v9iattr.valid & P9_ATTR_MTIME_SET) {
   1626                times[1].tv_sec = v9iattr.mtime_sec;
   1627                times[1].tv_nsec = v9iattr.mtime_nsec;
   1628            } else {
   1629                times[1].tv_nsec = UTIME_NOW;
   1630            }
   1631        } else {
   1632            times[1].tv_nsec = UTIME_OMIT;
   1633        }
   1634        err = v9fs_co_utimensat(pdu, &fidp->path, times);
   1635        if (err < 0) {
   1636            goto out;
   1637        }
   1638    }
   1639    /*
   1640     * If the only valid entry in iattr is ctime we can call
   1641     * chown(-1,-1) to update the ctime of the file
   1642     */
   1643    if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
   1644        ((v9iattr.valid & P9_ATTR_CTIME)
   1645         && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
   1646        if (!(v9iattr.valid & P9_ATTR_UID)) {
   1647            v9iattr.uid = -1;
   1648        }
   1649        if (!(v9iattr.valid & P9_ATTR_GID)) {
   1650            v9iattr.gid = -1;
   1651        }
   1652        err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
   1653                            v9iattr.gid);
   1654        if (err < 0) {
   1655            goto out;
   1656        }
   1657    }
   1658    if (v9iattr.valid & (P9_ATTR_SIZE)) {
   1659        err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
   1660        if (err < 0) {
   1661            goto out;
   1662        }
   1663    }
   1664    err = offset;
   1665    trace_v9fs_setattr_return(pdu->tag, pdu->id);
   1666out:
   1667    put_fid(pdu, fidp);
   1668out_nofid:
   1669    pdu_complete(pdu, err);
   1670}
   1671
   1672static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
   1673{
   1674    int i;
   1675    ssize_t err;
   1676    size_t offset = 7;
   1677
   1678    err = pdu_marshal(pdu, offset, "w", nwnames);
   1679    if (err < 0) {
   1680        return err;
   1681    }
   1682    offset += err;
   1683    for (i = 0; i < nwnames; i++) {
   1684        err = pdu_marshal(pdu, offset, "Q", &qids[i]);
   1685        if (err < 0) {
   1686            return err;
   1687        }
   1688        offset += err;
   1689    }
   1690    return offset;
   1691}
   1692
   1693static bool name_is_illegal(const char *name)
   1694{
   1695    return !*name || strchr(name, '/') != NULL;
   1696}
   1697
   1698static bool same_stat_id(const struct stat *a, const struct stat *b)
   1699{
   1700    return a->st_dev == b->st_dev && a->st_ino == b->st_ino;
   1701}
   1702
   1703static void coroutine_fn v9fs_walk(void *opaque)
   1704{
   1705    int name_idx;
   1706    g_autofree V9fsQID *qids = NULL;
   1707    int i, err = 0;
   1708    V9fsPath dpath, path, *pathes = NULL;
   1709    uint16_t nwnames;
   1710    struct stat stbuf, fidst;
   1711    g_autofree struct stat *stbufs = NULL;
   1712    size_t offset = 7;
   1713    int32_t fid, newfid;
   1714    V9fsString *wnames = NULL;
   1715    V9fsFidState *fidp;
   1716    V9fsFidState *newfidp = NULL;
   1717    V9fsPDU *pdu = opaque;
   1718    V9fsState *s = pdu->s;
   1719    V9fsQID qid;
   1720
   1721    err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
   1722    if (err < 0) {
   1723        pdu_complete(pdu, err);
   1724        return ;
   1725    }
   1726    offset += err;
   1727
   1728    trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
   1729
   1730    if (nwnames > P9_MAXWELEM) {
   1731        err = -EINVAL;
   1732        goto out_nofid;
   1733    }
   1734    if (nwnames) {
   1735        wnames = g_new0(V9fsString, nwnames);
   1736        qids   = g_new0(V9fsQID, nwnames);
   1737        stbufs = g_new0(struct stat, nwnames);
   1738        pathes = g_new0(V9fsPath, nwnames);
   1739        for (i = 0; i < nwnames; i++) {
   1740            err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
   1741            if (err < 0) {
   1742                goto out_nofid;
   1743            }
   1744            if (name_is_illegal(wnames[i].data)) {
   1745                err = -ENOENT;
   1746                goto out_nofid;
   1747            }
   1748            offset += err;
   1749        }
   1750    }
   1751    fidp = get_fid(pdu, fid);
   1752    if (fidp == NULL) {
   1753        err = -ENOENT;
   1754        goto out_nofid;
   1755    }
   1756
   1757    v9fs_path_init(&dpath);
   1758    v9fs_path_init(&path);
   1759    /*
   1760     * Both dpath and path initially point to fidp.
   1761     * Needed to handle request with nwnames == 0
   1762     */
   1763    v9fs_path_copy(&dpath, &fidp->path);
   1764    v9fs_path_copy(&path, &fidp->path);
   1765
   1766    /*
   1767     * To keep latency (i.e. overall execution time for processing this
   1768     * Twalk client request) as small as possible, run all the required fs
   1769     * driver code altogether inside the following block.
   1770     */
   1771    v9fs_co_run_in_worker({
   1772        if (v9fs_request_cancelled(pdu)) {
   1773            err = -EINTR;
   1774            break;
   1775        }
   1776        err = s->ops->lstat(&s->ctx, &dpath, &fidst);
   1777        if (err < 0) {
   1778            err = -errno;
   1779            break;
   1780        }
   1781        stbuf = fidst;
   1782        for (name_idx = 0; name_idx < nwnames; name_idx++) {
   1783            if (v9fs_request_cancelled(pdu)) {
   1784                err = -EINTR;
   1785                break;
   1786            }
   1787            if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
   1788                strcmp("..", wnames[name_idx].data))
   1789            {
   1790                err = s->ops->name_to_path(&s->ctx, &dpath,
   1791                                           wnames[name_idx].data,
   1792                                           &pathes[name_idx]);
   1793                if (err < 0) {
   1794                    err = -errno;
   1795                    break;
   1796                }
   1797                if (v9fs_request_cancelled(pdu)) {
   1798                    err = -EINTR;
   1799                    break;
   1800                }
   1801                err = s->ops->lstat(&s->ctx, &pathes[name_idx], &stbuf);
   1802                if (err < 0) {
   1803                    err = -errno;
   1804                    break;
   1805                }
   1806                stbufs[name_idx] = stbuf;
   1807                v9fs_path_copy(&dpath, &pathes[name_idx]);
   1808            }
   1809        }
   1810    });
   1811    /*
   1812     * Handle all the rest of this Twalk request on main thread ...
   1813     */
   1814    if (err < 0) {
   1815        goto out;
   1816    }
   1817
   1818    err = stat_to_qid(pdu, &fidst, &qid);
   1819    if (err < 0) {
   1820        goto out;
   1821    }
   1822    stbuf = fidst;
   1823
   1824    /* reset dpath and path */
   1825    v9fs_path_copy(&dpath, &fidp->path);
   1826    v9fs_path_copy(&path, &fidp->path);
   1827
   1828    for (name_idx = 0; name_idx < nwnames; name_idx++) {
   1829        if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
   1830            strcmp("..", wnames[name_idx].data))
   1831        {
   1832            stbuf = stbufs[name_idx];
   1833            err = stat_to_qid(pdu, &stbuf, &qid);
   1834            if (err < 0) {
   1835                goto out;
   1836            }
   1837            v9fs_path_copy(&path, &pathes[name_idx]);
   1838            v9fs_path_copy(&dpath, &path);
   1839        }
   1840        memcpy(&qids[name_idx], &qid, sizeof(qid));
   1841    }
   1842    if (fid == newfid) {
   1843        if (fidp->fid_type != P9_FID_NONE) {
   1844            err = -EINVAL;
   1845            goto out;
   1846        }
   1847        v9fs_path_write_lock(s);
   1848        v9fs_path_copy(&fidp->path, &path);
   1849        v9fs_path_unlock(s);
   1850    } else {
   1851        newfidp = alloc_fid(s, newfid);
   1852        if (newfidp == NULL) {
   1853            err = -EINVAL;
   1854            goto out;
   1855        }
   1856        newfidp->uid = fidp->uid;
   1857        v9fs_path_copy(&newfidp->path, &path);
   1858    }
   1859    err = v9fs_walk_marshal(pdu, nwnames, qids);
   1860    trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
   1861out:
   1862    put_fid(pdu, fidp);
   1863    if (newfidp) {
   1864        put_fid(pdu, newfidp);
   1865    }
   1866    v9fs_path_free(&dpath);
   1867    v9fs_path_free(&path);
   1868out_nofid:
   1869    pdu_complete(pdu, err);
   1870    if (nwnames && nwnames <= P9_MAXWELEM) {
   1871        for (name_idx = 0; name_idx < nwnames; name_idx++) {
   1872            v9fs_string_free(&wnames[name_idx]);
   1873            v9fs_path_free(&pathes[name_idx]);
   1874        }
   1875        g_free(wnames);
   1876        g_free(pathes);
   1877    }
   1878}
   1879
   1880static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
   1881{
   1882    struct statfs stbuf;
   1883    int32_t iounit = 0;
   1884    V9fsState *s = pdu->s;
   1885
   1886    /*
   1887     * iounit should be multiples of f_bsize (host filesystem block size
   1888     * and as well as less than (client msize - P9_IOHDRSZ))
   1889     */
   1890    if (!v9fs_co_statfs(pdu, path, &stbuf)) {
   1891        if (stbuf.f_bsize) {
   1892            iounit = stbuf.f_bsize;
   1893            iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
   1894        }
   1895    }
   1896    if (!iounit) {
   1897        iounit = s->msize - P9_IOHDRSZ;
   1898    }
   1899    return iounit;
   1900}
   1901
   1902static void coroutine_fn v9fs_open(void *opaque)
   1903{
   1904    int flags;
   1905    int32_t fid;
   1906    int32_t mode;
   1907    V9fsQID qid;
   1908    int iounit = 0;
   1909    ssize_t err = 0;
   1910    size_t offset = 7;
   1911    struct stat stbuf;
   1912    V9fsFidState *fidp;
   1913    V9fsPDU *pdu = opaque;
   1914    V9fsState *s = pdu->s;
   1915
   1916    if (s->proto_version == V9FS_PROTO_2000L) {
   1917        err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
   1918    } else {
   1919        uint8_t modebyte;
   1920        err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
   1921        mode = modebyte;
   1922    }
   1923    if (err < 0) {
   1924        goto out_nofid;
   1925    }
   1926    trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
   1927
   1928    fidp = get_fid(pdu, fid);
   1929    if (fidp == NULL) {
   1930        err = -ENOENT;
   1931        goto out_nofid;
   1932    }
   1933    if (fidp->fid_type != P9_FID_NONE) {
   1934        err = -EINVAL;
   1935        goto out;
   1936    }
   1937
   1938    err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
   1939    if (err < 0) {
   1940        goto out;
   1941    }
   1942    err = stat_to_qid(pdu, &stbuf, &qid);
   1943    if (err < 0) {
   1944        goto out;
   1945    }
   1946    if (S_ISDIR(stbuf.st_mode)) {
   1947        err = v9fs_co_opendir(pdu, fidp);
   1948        if (err < 0) {
   1949            goto out;
   1950        }
   1951        fidp->fid_type = P9_FID_DIR;
   1952        err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
   1953        if (err < 0) {
   1954            goto out;
   1955        }
   1956        err += offset;
   1957    } else {
   1958        if (s->proto_version == V9FS_PROTO_2000L) {
   1959            flags = get_dotl_openflags(s, mode);
   1960        } else {
   1961            flags = omode_to_uflags(mode);
   1962        }
   1963        if (is_ro_export(&s->ctx)) {
   1964            if (mode & O_WRONLY || mode & O_RDWR ||
   1965                mode & O_APPEND || mode & O_TRUNC) {
   1966                err = -EROFS;
   1967                goto out;
   1968            }
   1969        }
   1970        err = v9fs_co_open(pdu, fidp, flags);
   1971        if (err < 0) {
   1972            goto out;
   1973        }
   1974        fidp->fid_type = P9_FID_FILE;
   1975        fidp->open_flags = flags;
   1976        if (flags & O_EXCL) {
   1977            /*
   1978             * We let the host file system do O_EXCL check
   1979             * We should not reclaim such fd
   1980             */
   1981            fidp->flags |= FID_NON_RECLAIMABLE;
   1982        }
   1983        iounit = get_iounit(pdu, &fidp->path);
   1984        err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
   1985        if (err < 0) {
   1986            goto out;
   1987        }
   1988        err += offset;
   1989    }
   1990    trace_v9fs_open_return(pdu->tag, pdu->id,
   1991                           qid.type, qid.version, qid.path, iounit);
   1992out:
   1993    put_fid(pdu, fidp);
   1994out_nofid:
   1995    pdu_complete(pdu, err);
   1996}
   1997
   1998static void coroutine_fn v9fs_lcreate(void *opaque)
   1999{
   2000    int32_t dfid, flags, mode;
   2001    gid_t gid;
   2002    ssize_t err = 0;
   2003    ssize_t offset = 7;
   2004    V9fsString name;
   2005    V9fsFidState *fidp;
   2006    struct stat stbuf;
   2007    V9fsQID qid;
   2008    int32_t iounit;
   2009    V9fsPDU *pdu = opaque;
   2010
   2011    v9fs_string_init(&name);
   2012    err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
   2013                        &name, &flags, &mode, &gid);
   2014    if (err < 0) {
   2015        goto out_nofid;
   2016    }
   2017    trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
   2018
   2019    if (name_is_illegal(name.data)) {
   2020        err = -ENOENT;
   2021        goto out_nofid;
   2022    }
   2023
   2024    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   2025        err = -EEXIST;
   2026        goto out_nofid;
   2027    }
   2028
   2029    fidp = get_fid(pdu, dfid);
   2030    if (fidp == NULL) {
   2031        err = -ENOENT;
   2032        goto out_nofid;
   2033    }
   2034    if (fidp->fid_type != P9_FID_NONE) {
   2035        err = -EINVAL;
   2036        goto out;
   2037    }
   2038
   2039    flags = get_dotl_openflags(pdu->s, flags);
   2040    err = v9fs_co_open2(pdu, fidp, &name, gid,
   2041                        flags | O_CREAT, mode, &stbuf);
   2042    if (err < 0) {
   2043        goto out;
   2044    }
   2045    fidp->fid_type = P9_FID_FILE;
   2046    fidp->open_flags = flags;
   2047    if (flags & O_EXCL) {
   2048        /*
   2049         * We let the host file system do O_EXCL check
   2050         * We should not reclaim such fd
   2051         */
   2052        fidp->flags |= FID_NON_RECLAIMABLE;
   2053    }
   2054    iounit =  get_iounit(pdu, &fidp->path);
   2055    err = stat_to_qid(pdu, &stbuf, &qid);
   2056    if (err < 0) {
   2057        goto out;
   2058    }
   2059    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
   2060    if (err < 0) {
   2061        goto out;
   2062    }
   2063    err += offset;
   2064    trace_v9fs_lcreate_return(pdu->tag, pdu->id,
   2065                              qid.type, qid.version, qid.path, iounit);
   2066out:
   2067    put_fid(pdu, fidp);
   2068out_nofid:
   2069    pdu_complete(pdu, err);
   2070    v9fs_string_free(&name);
   2071}
   2072
   2073static void coroutine_fn v9fs_fsync(void *opaque)
   2074{
   2075    int err;
   2076    int32_t fid;
   2077    int datasync;
   2078    size_t offset = 7;
   2079    V9fsFidState *fidp;
   2080    V9fsPDU *pdu = opaque;
   2081
   2082    err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
   2083    if (err < 0) {
   2084        goto out_nofid;
   2085    }
   2086    trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
   2087
   2088    fidp = get_fid(pdu, fid);
   2089    if (fidp == NULL) {
   2090        err = -ENOENT;
   2091        goto out_nofid;
   2092    }
   2093    err = v9fs_co_fsync(pdu, fidp, datasync);
   2094    if (!err) {
   2095        err = offset;
   2096    }
   2097    put_fid(pdu, fidp);
   2098out_nofid:
   2099    pdu_complete(pdu, err);
   2100}
   2101
   2102static void coroutine_fn v9fs_clunk(void *opaque)
   2103{
   2104    int err;
   2105    int32_t fid;
   2106    size_t offset = 7;
   2107    V9fsFidState *fidp;
   2108    V9fsPDU *pdu = opaque;
   2109    V9fsState *s = pdu->s;
   2110
   2111    err = pdu_unmarshal(pdu, offset, "d", &fid);
   2112    if (err < 0) {
   2113        goto out_nofid;
   2114    }
   2115    trace_v9fs_clunk(pdu->tag, pdu->id, fid);
   2116
   2117    fidp = clunk_fid(s, fid);
   2118    if (fidp == NULL) {
   2119        err = -ENOENT;
   2120        goto out_nofid;
   2121    }
   2122    /*
   2123     * Bump the ref so that put_fid will
   2124     * free the fid.
   2125     */
   2126    fidp->ref++;
   2127    err = put_fid(pdu, fidp);
   2128    if (!err) {
   2129        err = offset;
   2130    }
   2131out_nofid:
   2132    pdu_complete(pdu, err);
   2133}
   2134
   2135/*
   2136 * Create a QEMUIOVector for a sub-region of PDU iovecs
   2137 *
   2138 * @qiov:       uninitialized QEMUIOVector
   2139 * @skip:       number of bytes to skip from beginning of PDU
   2140 * @size:       number of bytes to include
   2141 * @is_write:   true - write, false - read
   2142 *
   2143 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
   2144 * with qemu_iovec_destroy().
   2145 */
   2146static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
   2147                                    size_t skip, size_t size,
   2148                                    bool is_write)
   2149{
   2150    QEMUIOVector elem;
   2151    struct iovec *iov;
   2152    unsigned int niov;
   2153
   2154    if (is_write) {
   2155        pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
   2156    } else {
   2157        pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
   2158    }
   2159
   2160    qemu_iovec_init_external(&elem, iov, niov);
   2161    qemu_iovec_init(qiov, niov);
   2162    qemu_iovec_concat(qiov, &elem, skip, size);
   2163}
   2164
   2165static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
   2166                           uint64_t off, uint32_t max_count)
   2167{
   2168    ssize_t err;
   2169    size_t offset = 7;
   2170    uint64_t read_count;
   2171    QEMUIOVector qiov_full;
   2172
   2173    if (fidp->fs.xattr.len < off) {
   2174        read_count = 0;
   2175    } else {
   2176        read_count = fidp->fs.xattr.len - off;
   2177    }
   2178    if (read_count > max_count) {
   2179        read_count = max_count;
   2180    }
   2181    err = pdu_marshal(pdu, offset, "d", read_count);
   2182    if (err < 0) {
   2183        return err;
   2184    }
   2185    offset += err;
   2186
   2187    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
   2188    err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
   2189                    ((char *)fidp->fs.xattr.value) + off,
   2190                    read_count);
   2191    qemu_iovec_destroy(&qiov_full);
   2192    if (err < 0) {
   2193        return err;
   2194    }
   2195    offset += err;
   2196    return offset;
   2197}
   2198
   2199static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
   2200                                                  V9fsFidState *fidp,
   2201                                                  uint32_t max_count)
   2202{
   2203    V9fsPath path;
   2204    V9fsStat v9stat;
   2205    int len, err = 0;
   2206    int32_t count = 0;
   2207    struct stat stbuf;
   2208    off_t saved_dir_pos;
   2209    struct dirent *dent;
   2210
   2211    /* save the directory position */
   2212    saved_dir_pos = v9fs_co_telldir(pdu, fidp);
   2213    if (saved_dir_pos < 0) {
   2214        return saved_dir_pos;
   2215    }
   2216
   2217    while (1) {
   2218        v9fs_path_init(&path);
   2219
   2220        v9fs_readdir_lock(&fidp->fs.dir);
   2221
   2222        err = v9fs_co_readdir(pdu, fidp, &dent);
   2223        if (err || !dent) {
   2224            break;
   2225        }
   2226        err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
   2227        if (err < 0) {
   2228            break;
   2229        }
   2230        err = v9fs_co_lstat(pdu, &path, &stbuf);
   2231        if (err < 0) {
   2232            break;
   2233        }
   2234        err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
   2235        if (err < 0) {
   2236            break;
   2237        }
   2238        if ((count + v9stat.size + 2) > max_count) {
   2239            v9fs_readdir_unlock(&fidp->fs.dir);
   2240
   2241            /* Ran out of buffer. Set dir back to old position and return */
   2242            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
   2243            v9fs_stat_free(&v9stat);
   2244            v9fs_path_free(&path);
   2245            return count;
   2246        }
   2247
   2248        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
   2249        len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
   2250
   2251        v9fs_readdir_unlock(&fidp->fs.dir);
   2252
   2253        if (len < 0) {
   2254            v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
   2255            v9fs_stat_free(&v9stat);
   2256            v9fs_path_free(&path);
   2257            return len;
   2258        }
   2259        count += len;
   2260        v9fs_stat_free(&v9stat);
   2261        v9fs_path_free(&path);
   2262        saved_dir_pos = dent->d_off;
   2263    }
   2264
   2265    v9fs_readdir_unlock(&fidp->fs.dir);
   2266
   2267    v9fs_path_free(&path);
   2268    if (err < 0) {
   2269        return err;
   2270    }
   2271    return count;
   2272}
   2273
   2274static void coroutine_fn v9fs_read(void *opaque)
   2275{
   2276    int32_t fid;
   2277    uint64_t off;
   2278    ssize_t err = 0;
   2279    int32_t count = 0;
   2280    size_t offset = 7;
   2281    uint32_t max_count;
   2282    V9fsFidState *fidp;
   2283    V9fsPDU *pdu = opaque;
   2284    V9fsState *s = pdu->s;
   2285
   2286    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
   2287    if (err < 0) {
   2288        goto out_nofid;
   2289    }
   2290    trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
   2291
   2292    fidp = get_fid(pdu, fid);
   2293    if (fidp == NULL) {
   2294        err = -EINVAL;
   2295        goto out_nofid;
   2296    }
   2297    if (fidp->fid_type == P9_FID_DIR) {
   2298        if (s->proto_version != V9FS_PROTO_2000U) {
   2299            warn_report_once(
   2300                "9p: bad client: T_read request on directory only expected "
   2301                "with 9P2000.u protocol version"
   2302            );
   2303            err = -EOPNOTSUPP;
   2304            goto out;
   2305        }
   2306        if (off == 0) {
   2307            v9fs_co_rewinddir(pdu, fidp);
   2308        }
   2309        count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
   2310        if (count < 0) {
   2311            err = count;
   2312            goto out;
   2313        }
   2314        err = pdu_marshal(pdu, offset, "d", count);
   2315        if (err < 0) {
   2316            goto out;
   2317        }
   2318        err += offset + count;
   2319    } else if (fidp->fid_type == P9_FID_FILE) {
   2320        QEMUIOVector qiov_full;
   2321        QEMUIOVector qiov;
   2322        int32_t len;
   2323
   2324        v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
   2325        qemu_iovec_init(&qiov, qiov_full.niov);
   2326        do {
   2327            qemu_iovec_reset(&qiov);
   2328            qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
   2329            if (0) {
   2330                print_sg(qiov.iov, qiov.niov);
   2331            }
   2332            /* Loop in case of EINTR */
   2333            do {
   2334                len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
   2335                if (len >= 0) {
   2336                    off   += len;
   2337                    count += len;
   2338                }
   2339            } while (len == -EINTR && !pdu->cancelled);
   2340            if (len < 0) {
   2341                /* IO error return the error */
   2342                err = len;
   2343                goto out_free_iovec;
   2344            }
   2345        } while (count < max_count && len > 0);
   2346        err = pdu_marshal(pdu, offset, "d", count);
   2347        if (err < 0) {
   2348            goto out_free_iovec;
   2349        }
   2350        err += offset + count;
   2351out_free_iovec:
   2352        qemu_iovec_destroy(&qiov);
   2353        qemu_iovec_destroy(&qiov_full);
   2354    } else if (fidp->fid_type == P9_FID_XATTR) {
   2355        err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
   2356    } else {
   2357        err = -EINVAL;
   2358    }
   2359    trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
   2360out:
   2361    put_fid(pdu, fidp);
   2362out_nofid:
   2363    pdu_complete(pdu, err);
   2364}
   2365
   2366/**
   2367 * Returns size required in Rreaddir response for the passed dirent @p name.
   2368 *
   2369 * @param name - directory entry's name (i.e. file name, directory name)
   2370 * @returns required size in bytes
   2371 */
   2372size_t v9fs_readdir_response_size(V9fsString *name)
   2373{
   2374    /*
   2375     * Size of each dirent on the wire: size of qid (13) + size of offset (8)
   2376     * size of type (1) + size of name.size (2) + strlen(name.data)
   2377     */
   2378    return 24 + v9fs_string_size(name);
   2379}
   2380
   2381static void v9fs_free_dirents(struct V9fsDirEnt *e)
   2382{
   2383    struct V9fsDirEnt *next = NULL;
   2384
   2385    for (; e; e = next) {
   2386        next = e->next;
   2387        g_free(e->dent);
   2388        g_free(e->st);
   2389        g_free(e);
   2390    }
   2391}
   2392
   2393static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
   2394                                        off_t offset, int32_t max_count)
   2395{
   2396    size_t size;
   2397    V9fsQID qid;
   2398    V9fsString name;
   2399    int len, err = 0;
   2400    int32_t count = 0;
   2401    struct dirent *dent;
   2402    struct stat *st;
   2403    struct V9fsDirEnt *entries = NULL;
   2404
   2405    /*
   2406     * inode remapping requires the device id, which in turn might be
   2407     * different for different directory entries, so if inode remapping is
   2408     * enabled we have to make a full stat for each directory entry
   2409     */
   2410    const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
   2411
   2412    /*
   2413     * Fetch all required directory entries altogether on a background IO
   2414     * thread from fs driver. We don't want to do that for each entry
   2415     * individually, because hopping between threads (this main IO thread
   2416     * and background IO driver thread) would sum up to huge latencies.
   2417     */
   2418    count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
   2419                                 dostat);
   2420    if (count < 0) {
   2421        err = count;
   2422        count = 0;
   2423        goto out;
   2424    }
   2425    count = 0;
   2426
   2427    for (struct V9fsDirEnt *e = entries; e; e = e->next) {
   2428        dent = e->dent;
   2429
   2430        if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
   2431            st = e->st;
   2432            /* e->st should never be NULL, but just to be sure */
   2433            if (!st) {
   2434                err = -1;
   2435                break;
   2436            }
   2437
   2438            /* remap inode */
   2439            err = stat_to_qid(pdu, st, &qid);
   2440            if (err < 0) {
   2441                break;
   2442            }
   2443        } else {
   2444            /*
   2445             * Fill up just the path field of qid because the client uses
   2446             * only that. To fill the entire qid structure we will have
   2447             * to stat each dirent found, which is expensive. For the
   2448             * latter reason we don't call stat_to_qid() here. Only drawback
   2449             * is that no multi-device export detection of stat_to_qid()
   2450             * would be done and provided as error to the user here. But
   2451             * user would get that error anyway when accessing those
   2452             * files/dirs through other ways.
   2453             */
   2454            size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
   2455            memcpy(&qid.path, &dent->d_ino, size);
   2456            /* Fill the other fields with dummy values */
   2457            qid.type = 0;
   2458            qid.version = 0;
   2459        }
   2460
   2461        v9fs_string_init(&name);
   2462        v9fs_string_sprintf(&name, "%s", dent->d_name);
   2463
   2464        /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
   2465        len = pdu_marshal(pdu, 11 + count, "Qqbs",
   2466                          &qid, dent->d_off,
   2467                          dent->d_type, &name);
   2468
   2469        v9fs_string_free(&name);
   2470
   2471        if (len < 0) {
   2472            err = len;
   2473            break;
   2474        }
   2475
   2476        count += len;
   2477    }
   2478
   2479out:
   2480    v9fs_free_dirents(entries);
   2481    if (err < 0) {
   2482        return err;
   2483    }
   2484    return count;
   2485}
   2486
   2487static void coroutine_fn v9fs_readdir(void *opaque)
   2488{
   2489    int32_t fid;
   2490    V9fsFidState *fidp;
   2491    ssize_t retval = 0;
   2492    size_t offset = 7;
   2493    uint64_t initial_offset;
   2494    int32_t count;
   2495    uint32_t max_count;
   2496    V9fsPDU *pdu = opaque;
   2497    V9fsState *s = pdu->s;
   2498
   2499    retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
   2500                           &initial_offset, &max_count);
   2501    if (retval < 0) {
   2502        goto out_nofid;
   2503    }
   2504    trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
   2505
   2506    /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
   2507    if (max_count > s->msize - 11) {
   2508        max_count = s->msize - 11;
   2509        warn_report_once(
   2510            "9p: bad client: T_readdir with count > msize - 11"
   2511        );
   2512    }
   2513
   2514    fidp = get_fid(pdu, fid);
   2515    if (fidp == NULL) {
   2516        retval = -EINVAL;
   2517        goto out_nofid;
   2518    }
   2519    if (!fidp->fs.dir.stream) {
   2520        retval = -EINVAL;
   2521        goto out;
   2522    }
   2523    if (s->proto_version != V9FS_PROTO_2000L) {
   2524        warn_report_once(
   2525            "9p: bad client: T_readdir request only expected with 9P2000.L "
   2526            "protocol version"
   2527        );
   2528        retval = -EOPNOTSUPP;
   2529        goto out;
   2530    }
   2531    count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
   2532    if (count < 0) {
   2533        retval = count;
   2534        goto out;
   2535    }
   2536    retval = pdu_marshal(pdu, offset, "d", count);
   2537    if (retval < 0) {
   2538        goto out;
   2539    }
   2540    retval += count + offset;
   2541    trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
   2542out:
   2543    put_fid(pdu, fidp);
   2544out_nofid:
   2545    pdu_complete(pdu, retval);
   2546}
   2547
   2548static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
   2549                            uint64_t off, uint32_t count,
   2550                            struct iovec *sg, int cnt)
   2551{
   2552    int i, to_copy;
   2553    ssize_t err = 0;
   2554    uint64_t write_count;
   2555    size_t offset = 7;
   2556
   2557
   2558    if (fidp->fs.xattr.len < off) {
   2559        return -ENOSPC;
   2560    }
   2561    write_count = fidp->fs.xattr.len - off;
   2562    if (write_count > count) {
   2563        write_count = count;
   2564    }
   2565    err = pdu_marshal(pdu, offset, "d", write_count);
   2566    if (err < 0) {
   2567        return err;
   2568    }
   2569    err += offset;
   2570    fidp->fs.xattr.copied_len += write_count;
   2571    /*
   2572     * Now copy the content from sg list
   2573     */
   2574    for (i = 0; i < cnt; i++) {
   2575        if (write_count > sg[i].iov_len) {
   2576            to_copy = sg[i].iov_len;
   2577        } else {
   2578            to_copy = write_count;
   2579        }
   2580        memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
   2581        /* updating vs->off since we are not using below */
   2582        off += to_copy;
   2583        write_count -= to_copy;
   2584    }
   2585
   2586    return err;
   2587}
   2588
   2589static void coroutine_fn v9fs_write(void *opaque)
   2590{
   2591    ssize_t err;
   2592    int32_t fid;
   2593    uint64_t off;
   2594    uint32_t count;
   2595    int32_t len = 0;
   2596    int32_t total = 0;
   2597    size_t offset = 7;
   2598    V9fsFidState *fidp;
   2599    V9fsPDU *pdu = opaque;
   2600    V9fsState *s = pdu->s;
   2601    QEMUIOVector qiov_full;
   2602    QEMUIOVector qiov;
   2603
   2604    err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
   2605    if (err < 0) {
   2606        pdu_complete(pdu, err);
   2607        return;
   2608    }
   2609    offset += err;
   2610    v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
   2611    trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
   2612
   2613    fidp = get_fid(pdu, fid);
   2614    if (fidp == NULL) {
   2615        err = -EINVAL;
   2616        goto out_nofid;
   2617    }
   2618    if (fidp->fid_type == P9_FID_FILE) {
   2619        if (fidp->fs.fd == -1) {
   2620            err = -EINVAL;
   2621            goto out;
   2622        }
   2623    } else if (fidp->fid_type == P9_FID_XATTR) {
   2624        /*
   2625         * setxattr operation
   2626         */
   2627        err = v9fs_xattr_write(s, pdu, fidp, off, count,
   2628                               qiov_full.iov, qiov_full.niov);
   2629        goto out;
   2630    } else {
   2631        err = -EINVAL;
   2632        goto out;
   2633    }
   2634    qemu_iovec_init(&qiov, qiov_full.niov);
   2635    do {
   2636        qemu_iovec_reset(&qiov);
   2637        qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
   2638        if (0) {
   2639            print_sg(qiov.iov, qiov.niov);
   2640        }
   2641        /* Loop in case of EINTR */
   2642        do {
   2643            len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
   2644            if (len >= 0) {
   2645                off   += len;
   2646                total += len;
   2647            }
   2648        } while (len == -EINTR && !pdu->cancelled);
   2649        if (len < 0) {
   2650            /* IO error return the error */
   2651            err = len;
   2652            goto out_qiov;
   2653        }
   2654    } while (total < count && len > 0);
   2655
   2656    offset = 7;
   2657    err = pdu_marshal(pdu, offset, "d", total);
   2658    if (err < 0) {
   2659        goto out_qiov;
   2660    }
   2661    err += offset;
   2662    trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
   2663out_qiov:
   2664    qemu_iovec_destroy(&qiov);
   2665out:
   2666    put_fid(pdu, fidp);
   2667out_nofid:
   2668    qemu_iovec_destroy(&qiov_full);
   2669    pdu_complete(pdu, err);
   2670}
   2671
   2672static void coroutine_fn v9fs_create(void *opaque)
   2673{
   2674    int32_t fid;
   2675    int err = 0;
   2676    size_t offset = 7;
   2677    V9fsFidState *fidp;
   2678    V9fsQID qid;
   2679    int32_t perm;
   2680    int8_t mode;
   2681    V9fsPath path;
   2682    struct stat stbuf;
   2683    V9fsString name;
   2684    V9fsString extension;
   2685    int iounit;
   2686    V9fsPDU *pdu = opaque;
   2687    V9fsState *s = pdu->s;
   2688
   2689    v9fs_path_init(&path);
   2690    v9fs_string_init(&name);
   2691    v9fs_string_init(&extension);
   2692    err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
   2693                        &perm, &mode, &extension);
   2694    if (err < 0) {
   2695        goto out_nofid;
   2696    }
   2697    trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
   2698
   2699    if (name_is_illegal(name.data)) {
   2700        err = -ENOENT;
   2701        goto out_nofid;
   2702    }
   2703
   2704    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   2705        err = -EEXIST;
   2706        goto out_nofid;
   2707    }
   2708
   2709    fidp = get_fid(pdu, fid);
   2710    if (fidp == NULL) {
   2711        err = -EINVAL;
   2712        goto out_nofid;
   2713    }
   2714    if (fidp->fid_type != P9_FID_NONE) {
   2715        err = -EINVAL;
   2716        goto out;
   2717    }
   2718    if (perm & P9_STAT_MODE_DIR) {
   2719        err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
   2720                            fidp->uid, -1, &stbuf);
   2721        if (err < 0) {
   2722            goto out;
   2723        }
   2724        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
   2725        if (err < 0) {
   2726            goto out;
   2727        }
   2728        v9fs_path_write_lock(s);
   2729        v9fs_path_copy(&fidp->path, &path);
   2730        v9fs_path_unlock(s);
   2731        err = v9fs_co_opendir(pdu, fidp);
   2732        if (err < 0) {
   2733            goto out;
   2734        }
   2735        fidp->fid_type = P9_FID_DIR;
   2736    } else if (perm & P9_STAT_MODE_SYMLINK) {
   2737        err = v9fs_co_symlink(pdu, fidp, &name,
   2738                              extension.data, -1 , &stbuf);
   2739        if (err < 0) {
   2740            goto out;
   2741        }
   2742        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
   2743        if (err < 0) {
   2744            goto out;
   2745        }
   2746        v9fs_path_write_lock(s);
   2747        v9fs_path_copy(&fidp->path, &path);
   2748        v9fs_path_unlock(s);
   2749    } else if (perm & P9_STAT_MODE_LINK) {
   2750        int32_t ofid = atoi(extension.data);
   2751        V9fsFidState *ofidp = get_fid(pdu, ofid);
   2752        if (ofidp == NULL) {
   2753            err = -EINVAL;
   2754            goto out;
   2755        }
   2756        err = v9fs_co_link(pdu, ofidp, fidp, &name);
   2757        put_fid(pdu, ofidp);
   2758        if (err < 0) {
   2759            goto out;
   2760        }
   2761        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
   2762        if (err < 0) {
   2763            fidp->fid_type = P9_FID_NONE;
   2764            goto out;
   2765        }
   2766        v9fs_path_write_lock(s);
   2767        v9fs_path_copy(&fidp->path, &path);
   2768        v9fs_path_unlock(s);
   2769        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
   2770        if (err < 0) {
   2771            fidp->fid_type = P9_FID_NONE;
   2772            goto out;
   2773        }
   2774    } else if (perm & P9_STAT_MODE_DEVICE) {
   2775        char ctype;
   2776        uint32_t major, minor;
   2777        mode_t nmode = 0;
   2778
   2779        if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
   2780            err = -errno;
   2781            goto out;
   2782        }
   2783
   2784        switch (ctype) {
   2785        case 'c':
   2786            nmode = S_IFCHR;
   2787            break;
   2788        case 'b':
   2789            nmode = S_IFBLK;
   2790            break;
   2791        default:
   2792            err = -EIO;
   2793            goto out;
   2794        }
   2795
   2796        nmode |= perm & 0777;
   2797        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
   2798                            makedev(major, minor), nmode, &stbuf);
   2799        if (err < 0) {
   2800            goto out;
   2801        }
   2802        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
   2803        if (err < 0) {
   2804            goto out;
   2805        }
   2806        v9fs_path_write_lock(s);
   2807        v9fs_path_copy(&fidp->path, &path);
   2808        v9fs_path_unlock(s);
   2809    } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
   2810        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
   2811                            0, S_IFIFO | (perm & 0777), &stbuf);
   2812        if (err < 0) {
   2813            goto out;
   2814        }
   2815        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
   2816        if (err < 0) {
   2817            goto out;
   2818        }
   2819        v9fs_path_write_lock(s);
   2820        v9fs_path_copy(&fidp->path, &path);
   2821        v9fs_path_unlock(s);
   2822    } else if (perm & P9_STAT_MODE_SOCKET) {
   2823        err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
   2824                            0, S_IFSOCK | (perm & 0777), &stbuf);
   2825        if (err < 0) {
   2826            goto out;
   2827        }
   2828        err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
   2829        if (err < 0) {
   2830            goto out;
   2831        }
   2832        v9fs_path_write_lock(s);
   2833        v9fs_path_copy(&fidp->path, &path);
   2834        v9fs_path_unlock(s);
   2835    } else {
   2836        err = v9fs_co_open2(pdu, fidp, &name, -1,
   2837                            omode_to_uflags(mode) | O_CREAT, perm, &stbuf);
   2838        if (err < 0) {
   2839            goto out;
   2840        }
   2841        fidp->fid_type = P9_FID_FILE;
   2842        fidp->open_flags = omode_to_uflags(mode);
   2843        if (fidp->open_flags & O_EXCL) {
   2844            /*
   2845             * We let the host file system do O_EXCL check
   2846             * We should not reclaim such fd
   2847             */
   2848            fidp->flags |= FID_NON_RECLAIMABLE;
   2849        }
   2850    }
   2851    iounit = get_iounit(pdu, &fidp->path);
   2852    err = stat_to_qid(pdu, &stbuf, &qid);
   2853    if (err < 0) {
   2854        goto out;
   2855    }
   2856    err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
   2857    if (err < 0) {
   2858        goto out;
   2859    }
   2860    err += offset;
   2861    trace_v9fs_create_return(pdu->tag, pdu->id,
   2862                             qid.type, qid.version, qid.path, iounit);
   2863out:
   2864    put_fid(pdu, fidp);
   2865out_nofid:
   2866   pdu_complete(pdu, err);
   2867   v9fs_string_free(&name);
   2868   v9fs_string_free(&extension);
   2869   v9fs_path_free(&path);
   2870}
   2871
   2872static void coroutine_fn v9fs_symlink(void *opaque)
   2873{
   2874    V9fsPDU *pdu = opaque;
   2875    V9fsString name;
   2876    V9fsString symname;
   2877    V9fsFidState *dfidp;
   2878    V9fsQID qid;
   2879    struct stat stbuf;
   2880    int32_t dfid;
   2881    int err = 0;
   2882    gid_t gid;
   2883    size_t offset = 7;
   2884
   2885    v9fs_string_init(&name);
   2886    v9fs_string_init(&symname);
   2887    err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
   2888    if (err < 0) {
   2889        goto out_nofid;
   2890    }
   2891    trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
   2892
   2893    if (name_is_illegal(name.data)) {
   2894        err = -ENOENT;
   2895        goto out_nofid;
   2896    }
   2897
   2898    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   2899        err = -EEXIST;
   2900        goto out_nofid;
   2901    }
   2902
   2903    dfidp = get_fid(pdu, dfid);
   2904    if (dfidp == NULL) {
   2905        err = -EINVAL;
   2906        goto out_nofid;
   2907    }
   2908    err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
   2909    if (err < 0) {
   2910        goto out;
   2911    }
   2912    err = stat_to_qid(pdu, &stbuf, &qid);
   2913    if (err < 0) {
   2914        goto out;
   2915    }
   2916    err =  pdu_marshal(pdu, offset, "Q", &qid);
   2917    if (err < 0) {
   2918        goto out;
   2919    }
   2920    err += offset;
   2921    trace_v9fs_symlink_return(pdu->tag, pdu->id,
   2922                              qid.type, qid.version, qid.path);
   2923out:
   2924    put_fid(pdu, dfidp);
   2925out_nofid:
   2926    pdu_complete(pdu, err);
   2927    v9fs_string_free(&name);
   2928    v9fs_string_free(&symname);
   2929}
   2930
   2931static void coroutine_fn v9fs_flush(void *opaque)
   2932{
   2933    ssize_t err;
   2934    int16_t tag;
   2935    size_t offset = 7;
   2936    V9fsPDU *cancel_pdu = NULL;
   2937    V9fsPDU *pdu = opaque;
   2938    V9fsState *s = pdu->s;
   2939
   2940    err = pdu_unmarshal(pdu, offset, "w", &tag);
   2941    if (err < 0) {
   2942        pdu_complete(pdu, err);
   2943        return;
   2944    }
   2945    trace_v9fs_flush(pdu->tag, pdu->id, tag);
   2946
   2947    if (pdu->tag == tag) {
   2948        warn_report("the guest sent a self-referencing 9P flush request");
   2949    } else {
   2950        QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
   2951            if (cancel_pdu->tag == tag) {
   2952                break;
   2953            }
   2954        }
   2955    }
   2956    if (cancel_pdu) {
   2957        cancel_pdu->cancelled = 1;
   2958        /*
   2959         * Wait for pdu to complete.
   2960         */
   2961        qemu_co_queue_wait(&cancel_pdu->complete, NULL);
   2962        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
   2963            cancel_pdu->cancelled = 0;
   2964            pdu_free(cancel_pdu);
   2965        }
   2966    }
   2967    pdu_complete(pdu, 7);
   2968}
   2969
   2970static void coroutine_fn v9fs_link(void *opaque)
   2971{
   2972    V9fsPDU *pdu = opaque;
   2973    int32_t dfid, oldfid;
   2974    V9fsFidState *dfidp, *oldfidp;
   2975    V9fsString name;
   2976    size_t offset = 7;
   2977    int err = 0;
   2978
   2979    v9fs_string_init(&name);
   2980    err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
   2981    if (err < 0) {
   2982        goto out_nofid;
   2983    }
   2984    trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
   2985
   2986    if (name_is_illegal(name.data)) {
   2987        err = -ENOENT;
   2988        goto out_nofid;
   2989    }
   2990
   2991    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   2992        err = -EEXIST;
   2993        goto out_nofid;
   2994    }
   2995
   2996    dfidp = get_fid(pdu, dfid);
   2997    if (dfidp == NULL) {
   2998        err = -ENOENT;
   2999        goto out_nofid;
   3000    }
   3001
   3002    oldfidp = get_fid(pdu, oldfid);
   3003    if (oldfidp == NULL) {
   3004        err = -ENOENT;
   3005        goto out;
   3006    }
   3007    err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
   3008    if (!err) {
   3009        err = offset;
   3010    }
   3011    put_fid(pdu, oldfidp);
   3012out:
   3013    put_fid(pdu, dfidp);
   3014out_nofid:
   3015    v9fs_string_free(&name);
   3016    pdu_complete(pdu, err);
   3017}
   3018
   3019/* Only works with path name based fid */
   3020static void coroutine_fn v9fs_remove(void *opaque)
   3021{
   3022    int32_t fid;
   3023    int err = 0;
   3024    size_t offset = 7;
   3025    V9fsFidState *fidp;
   3026    V9fsPDU *pdu = opaque;
   3027
   3028    err = pdu_unmarshal(pdu, offset, "d", &fid);
   3029    if (err < 0) {
   3030        goto out_nofid;
   3031    }
   3032    trace_v9fs_remove(pdu->tag, pdu->id, fid);
   3033
   3034    fidp = get_fid(pdu, fid);
   3035    if (fidp == NULL) {
   3036        err = -EINVAL;
   3037        goto out_nofid;
   3038    }
   3039    /* if fs driver is not path based, return EOPNOTSUPP */
   3040    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
   3041        err = -EOPNOTSUPP;
   3042        goto out_err;
   3043    }
   3044    /*
   3045     * IF the file is unlinked, we cannot reopen
   3046     * the file later. So don't reclaim fd
   3047     */
   3048    err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
   3049    if (err < 0) {
   3050        goto out_err;
   3051    }
   3052    err = v9fs_co_remove(pdu, &fidp->path);
   3053    if (!err) {
   3054        err = offset;
   3055    }
   3056out_err:
   3057    /* For TREMOVE we need to clunk the fid even on failed remove */
   3058    clunk_fid(pdu->s, fidp->fid);
   3059    put_fid(pdu, fidp);
   3060out_nofid:
   3061    pdu_complete(pdu, err);
   3062}
   3063
   3064static void coroutine_fn v9fs_unlinkat(void *opaque)
   3065{
   3066    int err = 0;
   3067    V9fsString name;
   3068    int32_t dfid, flags, rflags = 0;
   3069    size_t offset = 7;
   3070    V9fsPath path;
   3071    V9fsFidState *dfidp;
   3072    V9fsPDU *pdu = opaque;
   3073
   3074    v9fs_string_init(&name);
   3075    err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
   3076    if (err < 0) {
   3077        goto out_nofid;
   3078    }
   3079
   3080    if (name_is_illegal(name.data)) {
   3081        err = -ENOENT;
   3082        goto out_nofid;
   3083    }
   3084
   3085    if (!strcmp(".", name.data)) {
   3086        err = -EINVAL;
   3087        goto out_nofid;
   3088    }
   3089
   3090    if (!strcmp("..", name.data)) {
   3091        err = -ENOTEMPTY;
   3092        goto out_nofid;
   3093    }
   3094
   3095    if (flags & ~P9_DOTL_AT_REMOVEDIR) {
   3096        err = -EINVAL;
   3097        goto out_nofid;
   3098    }
   3099
   3100    if (flags & P9_DOTL_AT_REMOVEDIR) {
   3101        rflags |= AT_REMOVEDIR;
   3102    }
   3103
   3104    dfidp = get_fid(pdu, dfid);
   3105    if (dfidp == NULL) {
   3106        err = -EINVAL;
   3107        goto out_nofid;
   3108    }
   3109    /*
   3110     * IF the file is unlinked, we cannot reopen
   3111     * the file later. So don't reclaim fd
   3112     */
   3113    v9fs_path_init(&path);
   3114    err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
   3115    if (err < 0) {
   3116        goto out_err;
   3117    }
   3118    err = v9fs_mark_fids_unreclaim(pdu, &path);
   3119    if (err < 0) {
   3120        goto out_err;
   3121    }
   3122    err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
   3123    if (!err) {
   3124        err = offset;
   3125    }
   3126out_err:
   3127    put_fid(pdu, dfidp);
   3128    v9fs_path_free(&path);
   3129out_nofid:
   3130    pdu_complete(pdu, err);
   3131    v9fs_string_free(&name);
   3132}
   3133
   3134
   3135/* Only works with path name based fid */
   3136static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
   3137                                             int32_t newdirfid,
   3138                                             V9fsString *name)
   3139{
   3140    int err = 0;
   3141    V9fsPath new_path;
   3142    V9fsFidState *tfidp;
   3143    V9fsState *s = pdu->s;
   3144    V9fsFidState *dirfidp = NULL;
   3145
   3146    v9fs_path_init(&new_path);
   3147    if (newdirfid != -1) {
   3148        dirfidp = get_fid(pdu, newdirfid);
   3149        if (dirfidp == NULL) {
   3150            return -ENOENT;
   3151        }
   3152        if (fidp->fid_type != P9_FID_NONE) {
   3153            err = -EINVAL;
   3154            goto out;
   3155        }
   3156        err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
   3157        if (err < 0) {
   3158            goto out;
   3159        }
   3160    } else {
   3161        char *dir_name = g_path_get_dirname(fidp->path.data);
   3162        V9fsPath dir_path;
   3163
   3164        v9fs_path_init(&dir_path);
   3165        v9fs_path_sprintf(&dir_path, "%s", dir_name);
   3166        g_free(dir_name);
   3167
   3168        err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
   3169        v9fs_path_free(&dir_path);
   3170        if (err < 0) {
   3171            goto out;
   3172        }
   3173    }
   3174    err = v9fs_co_rename(pdu, &fidp->path, &new_path);
   3175    if (err < 0) {
   3176        goto out;
   3177    }
   3178    /*
   3179     * Fixup fid's pointing to the old name to
   3180     * start pointing to the new name
   3181     */
   3182    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
   3183        if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
   3184            /* replace the name */
   3185            v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
   3186        }
   3187    }
   3188out:
   3189    if (dirfidp) {
   3190        put_fid(pdu, dirfidp);
   3191    }
   3192    v9fs_path_free(&new_path);
   3193    return err;
   3194}
   3195
   3196/* Only works with path name based fid */
   3197static void coroutine_fn v9fs_rename(void *opaque)
   3198{
   3199    int32_t fid;
   3200    ssize_t err = 0;
   3201    size_t offset = 7;
   3202    V9fsString name;
   3203    int32_t newdirfid;
   3204    V9fsFidState *fidp;
   3205    V9fsPDU *pdu = opaque;
   3206    V9fsState *s = pdu->s;
   3207
   3208    v9fs_string_init(&name);
   3209    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
   3210    if (err < 0) {
   3211        goto out_nofid;
   3212    }
   3213
   3214    if (name_is_illegal(name.data)) {
   3215        err = -ENOENT;
   3216        goto out_nofid;
   3217    }
   3218
   3219    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   3220        err = -EISDIR;
   3221        goto out_nofid;
   3222    }
   3223
   3224    fidp = get_fid(pdu, fid);
   3225    if (fidp == NULL) {
   3226        err = -ENOENT;
   3227        goto out_nofid;
   3228    }
   3229    if (fidp->fid_type != P9_FID_NONE) {
   3230        err = -EINVAL;
   3231        goto out;
   3232    }
   3233    /* if fs driver is not path based, return EOPNOTSUPP */
   3234    if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
   3235        err = -EOPNOTSUPP;
   3236        goto out;
   3237    }
   3238    v9fs_path_write_lock(s);
   3239    err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
   3240    v9fs_path_unlock(s);
   3241    if (!err) {
   3242        err = offset;
   3243    }
   3244out:
   3245    put_fid(pdu, fidp);
   3246out_nofid:
   3247    pdu_complete(pdu, err);
   3248    v9fs_string_free(&name);
   3249}
   3250
   3251static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
   3252                                           V9fsString *old_name,
   3253                                           V9fsPath *newdir,
   3254                                           V9fsString *new_name)
   3255{
   3256    V9fsFidState *tfidp;
   3257    V9fsPath oldpath, newpath;
   3258    V9fsState *s = pdu->s;
   3259    int err;
   3260
   3261    v9fs_path_init(&oldpath);
   3262    v9fs_path_init(&newpath);
   3263    err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
   3264    if (err < 0) {
   3265        goto out;
   3266    }
   3267    err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
   3268    if (err < 0) {
   3269        goto out;
   3270    }
   3271
   3272    /*
   3273     * Fixup fid's pointing to the old name to
   3274     * start pointing to the new name
   3275     */
   3276    QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
   3277        if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
   3278            /* replace the name */
   3279            v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
   3280        }
   3281    }
   3282out:
   3283    v9fs_path_free(&oldpath);
   3284    v9fs_path_free(&newpath);
   3285    return err;
   3286}
   3287
   3288static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
   3289                                               V9fsString *old_name,
   3290                                               int32_t newdirfid,
   3291                                               V9fsString *new_name)
   3292{
   3293    int err = 0;
   3294    V9fsState *s = pdu->s;
   3295    V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
   3296
   3297    olddirfidp = get_fid(pdu, olddirfid);
   3298    if (olddirfidp == NULL) {
   3299        err = -ENOENT;
   3300        goto out;
   3301    }
   3302    if (newdirfid != -1) {
   3303        newdirfidp = get_fid(pdu, newdirfid);
   3304        if (newdirfidp == NULL) {
   3305            err = -ENOENT;
   3306            goto out;
   3307        }
   3308    } else {
   3309        newdirfidp = get_fid(pdu, olddirfid);
   3310    }
   3311
   3312    err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
   3313                           &newdirfidp->path, new_name);
   3314    if (err < 0) {
   3315        goto out;
   3316    }
   3317    if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
   3318        /* Only for path based fid  we need to do the below fixup */
   3319        err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
   3320                                 &newdirfidp->path, new_name);
   3321    }
   3322out:
   3323    if (olddirfidp) {
   3324        put_fid(pdu, olddirfidp);
   3325    }
   3326    if (newdirfidp) {
   3327        put_fid(pdu, newdirfidp);
   3328    }
   3329    return err;
   3330}
   3331
   3332static void coroutine_fn v9fs_renameat(void *opaque)
   3333{
   3334    ssize_t err = 0;
   3335    size_t offset = 7;
   3336    V9fsPDU *pdu = opaque;
   3337    V9fsState *s = pdu->s;
   3338    int32_t olddirfid, newdirfid;
   3339    V9fsString old_name, new_name;
   3340
   3341    v9fs_string_init(&old_name);
   3342    v9fs_string_init(&new_name);
   3343    err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
   3344                        &old_name, &newdirfid, &new_name);
   3345    if (err < 0) {
   3346        goto out_err;
   3347    }
   3348
   3349    if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
   3350        err = -ENOENT;
   3351        goto out_err;
   3352    }
   3353
   3354    if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
   3355        !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
   3356        err = -EISDIR;
   3357        goto out_err;
   3358    }
   3359
   3360    v9fs_path_write_lock(s);
   3361    err = v9fs_complete_renameat(pdu, olddirfid,
   3362                                 &old_name, newdirfid, &new_name);
   3363    v9fs_path_unlock(s);
   3364    if (!err) {
   3365        err = offset;
   3366    }
   3367
   3368out_err:
   3369    pdu_complete(pdu, err);
   3370    v9fs_string_free(&old_name);
   3371    v9fs_string_free(&new_name);
   3372}
   3373
   3374static void coroutine_fn v9fs_wstat(void *opaque)
   3375{
   3376    int32_t fid;
   3377    int err = 0;
   3378    int16_t unused;
   3379    V9fsStat v9stat;
   3380    size_t offset = 7;
   3381    struct stat stbuf;
   3382    V9fsFidState *fidp;
   3383    V9fsPDU *pdu = opaque;
   3384    V9fsState *s = pdu->s;
   3385
   3386    v9fs_stat_init(&v9stat);
   3387    err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
   3388    if (err < 0) {
   3389        goto out_nofid;
   3390    }
   3391    trace_v9fs_wstat(pdu->tag, pdu->id, fid,
   3392                     v9stat.mode, v9stat.atime, v9stat.mtime);
   3393
   3394    fidp = get_fid(pdu, fid);
   3395    if (fidp == NULL) {
   3396        err = -EINVAL;
   3397        goto out_nofid;
   3398    }
   3399    /* do we need to sync the file? */
   3400    if (donttouch_stat(&v9stat)) {
   3401        err = v9fs_co_fsync(pdu, fidp, 0);
   3402        goto out;
   3403    }
   3404    if (v9stat.mode != -1) {
   3405        uint32_t v9_mode;
   3406        err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
   3407        if (err < 0) {
   3408            goto out;
   3409        }
   3410        v9_mode = stat_to_v9mode(&stbuf);
   3411        if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
   3412            (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
   3413            /* Attempting to change the type */
   3414            err = -EIO;
   3415            goto out;
   3416        }
   3417        err = v9fs_co_chmod(pdu, &fidp->path,
   3418                            v9mode_to_mode(v9stat.mode,
   3419                                           &v9stat.extension));
   3420        if (err < 0) {
   3421            goto out;
   3422        }
   3423    }
   3424    if (v9stat.mtime != -1 || v9stat.atime != -1) {
   3425        struct timespec times[2];
   3426        if (v9stat.atime != -1) {
   3427            times[0].tv_sec = v9stat.atime;
   3428            times[0].tv_nsec = 0;
   3429        } else {
   3430            times[0].tv_nsec = UTIME_OMIT;
   3431        }
   3432        if (v9stat.mtime != -1) {
   3433            times[1].tv_sec = v9stat.mtime;
   3434            times[1].tv_nsec = 0;
   3435        } else {
   3436            times[1].tv_nsec = UTIME_OMIT;
   3437        }
   3438        err = v9fs_co_utimensat(pdu, &fidp->path, times);
   3439        if (err < 0) {
   3440            goto out;
   3441        }
   3442    }
   3443    if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
   3444        err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
   3445        if (err < 0) {
   3446            goto out;
   3447        }
   3448    }
   3449    if (v9stat.name.size != 0) {
   3450        v9fs_path_write_lock(s);
   3451        err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
   3452        v9fs_path_unlock(s);
   3453        if (err < 0) {
   3454            goto out;
   3455        }
   3456    }
   3457    if (v9stat.length != -1) {
   3458        err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
   3459        if (err < 0) {
   3460            goto out;
   3461        }
   3462    }
   3463    err = offset;
   3464out:
   3465    put_fid(pdu, fidp);
   3466out_nofid:
   3467    v9fs_stat_free(&v9stat);
   3468    pdu_complete(pdu, err);
   3469}
   3470
   3471static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
   3472{
   3473    uint32_t f_type;
   3474    uint32_t f_bsize;
   3475    uint64_t f_blocks;
   3476    uint64_t f_bfree;
   3477    uint64_t f_bavail;
   3478    uint64_t f_files;
   3479    uint64_t f_ffree;
   3480    uint64_t fsid_val;
   3481    uint32_t f_namelen;
   3482    size_t offset = 7;
   3483    int32_t bsize_factor;
   3484
   3485    /*
   3486     * compute bsize factor based on host file system block size
   3487     * and client msize
   3488     */
   3489    bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize;
   3490    if (!bsize_factor) {
   3491        bsize_factor = 1;
   3492    }
   3493    f_type  = stbuf->f_type;
   3494    f_bsize = stbuf->f_bsize;
   3495    f_bsize *= bsize_factor;
   3496    /*
   3497     * f_bsize is adjusted(multiplied) by bsize factor, so we need to
   3498     * adjust(divide) the number of blocks, free blocks and available
   3499     * blocks by bsize factor
   3500     */
   3501    f_blocks = stbuf->f_blocks / bsize_factor;
   3502    f_bfree  = stbuf->f_bfree / bsize_factor;
   3503    f_bavail = stbuf->f_bavail / bsize_factor;
   3504    f_files  = stbuf->f_files;
   3505    f_ffree  = stbuf->f_ffree;
   3506    fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
   3507               (unsigned long long)stbuf->f_fsid.__val[1] << 32;
   3508    f_namelen = stbuf->f_namelen;
   3509
   3510    return pdu_marshal(pdu, offset, "ddqqqqqqd",
   3511                       f_type, f_bsize, f_blocks, f_bfree,
   3512                       f_bavail, f_files, f_ffree,
   3513                       fsid_val, f_namelen);
   3514}
   3515
   3516static void coroutine_fn v9fs_statfs(void *opaque)
   3517{
   3518    int32_t fid;
   3519    ssize_t retval = 0;
   3520    size_t offset = 7;
   3521    V9fsFidState *fidp;
   3522    struct statfs stbuf;
   3523    V9fsPDU *pdu = opaque;
   3524    V9fsState *s = pdu->s;
   3525
   3526    retval = pdu_unmarshal(pdu, offset, "d", &fid);
   3527    if (retval < 0) {
   3528        goto out_nofid;
   3529    }
   3530    fidp = get_fid(pdu, fid);
   3531    if (fidp == NULL) {
   3532        retval = -ENOENT;
   3533        goto out_nofid;
   3534    }
   3535    retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
   3536    if (retval < 0) {
   3537        goto out;
   3538    }
   3539    retval = v9fs_fill_statfs(s, pdu, &stbuf);
   3540    if (retval < 0) {
   3541        goto out;
   3542    }
   3543    retval += offset;
   3544out:
   3545    put_fid(pdu, fidp);
   3546out_nofid:
   3547    pdu_complete(pdu, retval);
   3548}
   3549
   3550static void coroutine_fn v9fs_mknod(void *opaque)
   3551{
   3552
   3553    int mode;
   3554    gid_t gid;
   3555    int32_t fid;
   3556    V9fsQID qid;
   3557    int err = 0;
   3558    int major, minor;
   3559    size_t offset = 7;
   3560    V9fsString name;
   3561    struct stat stbuf;
   3562    V9fsFidState *fidp;
   3563    V9fsPDU *pdu = opaque;
   3564
   3565    v9fs_string_init(&name);
   3566    err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
   3567                        &major, &minor, &gid);
   3568    if (err < 0) {
   3569        goto out_nofid;
   3570    }
   3571    trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
   3572
   3573    if (name_is_illegal(name.data)) {
   3574        err = -ENOENT;
   3575        goto out_nofid;
   3576    }
   3577
   3578    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   3579        err = -EEXIST;
   3580        goto out_nofid;
   3581    }
   3582
   3583    fidp = get_fid(pdu, fid);
   3584    if (fidp == NULL) {
   3585        err = -ENOENT;
   3586        goto out_nofid;
   3587    }
   3588    err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
   3589                        makedev(major, minor), mode, &stbuf);
   3590    if (err < 0) {
   3591        goto out;
   3592    }
   3593    err = stat_to_qid(pdu, &stbuf, &qid);
   3594    if (err < 0) {
   3595        goto out;
   3596    }
   3597    err = pdu_marshal(pdu, offset, "Q", &qid);
   3598    if (err < 0) {
   3599        goto out;
   3600    }
   3601    err += offset;
   3602    trace_v9fs_mknod_return(pdu->tag, pdu->id,
   3603                            qid.type, qid.version, qid.path);
   3604out:
   3605    put_fid(pdu, fidp);
   3606out_nofid:
   3607    pdu_complete(pdu, err);
   3608    v9fs_string_free(&name);
   3609}
   3610
   3611/*
   3612 * Implement posix byte range locking code
   3613 * Server side handling of locking code is very simple, because 9p server in
   3614 * QEMU can handle only one client. And most of the lock handling
   3615 * (like conflict, merging) etc is done by the VFS layer itself, so no need to
   3616 * do any thing in * qemu 9p server side lock code path.
   3617 * So when a TLOCK request comes, always return success
   3618 */
   3619static void coroutine_fn v9fs_lock(void *opaque)
   3620{
   3621    V9fsFlock flock;
   3622    size_t offset = 7;
   3623    struct stat stbuf;
   3624    V9fsFidState *fidp;
   3625    int32_t fid, err = 0;
   3626    V9fsPDU *pdu = opaque;
   3627
   3628    v9fs_string_init(&flock.client_id);
   3629    err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
   3630                        &flock.flags, &flock.start, &flock.length,
   3631                        &flock.proc_id, &flock.client_id);
   3632    if (err < 0) {
   3633        goto out_nofid;
   3634    }
   3635    trace_v9fs_lock(pdu->tag, pdu->id, fid,
   3636                    flock.type, flock.start, flock.length);
   3637
   3638
   3639    /* We support only block flag now (that too ignored currently) */
   3640    if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
   3641        err = -EINVAL;
   3642        goto out_nofid;
   3643    }
   3644    fidp = get_fid(pdu, fid);
   3645    if (fidp == NULL) {
   3646        err = -ENOENT;
   3647        goto out_nofid;
   3648    }
   3649    err = v9fs_co_fstat(pdu, fidp, &stbuf);
   3650    if (err < 0) {
   3651        goto out;
   3652    }
   3653    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
   3654    if (err < 0) {
   3655        goto out;
   3656    }
   3657    err += offset;
   3658    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
   3659out:
   3660    put_fid(pdu, fidp);
   3661out_nofid:
   3662    pdu_complete(pdu, err);
   3663    v9fs_string_free(&flock.client_id);
   3664}
   3665
   3666/*
   3667 * When a TGETLOCK request comes, always return success because all lock
   3668 * handling is done by client's VFS layer.
   3669 */
   3670static void coroutine_fn v9fs_getlock(void *opaque)
   3671{
   3672    size_t offset = 7;
   3673    struct stat stbuf;
   3674    V9fsFidState *fidp;
   3675    V9fsGetlock glock;
   3676    int32_t fid, err = 0;
   3677    V9fsPDU *pdu = opaque;
   3678
   3679    v9fs_string_init(&glock.client_id);
   3680    err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
   3681                        &glock.start, &glock.length, &glock.proc_id,
   3682                        &glock.client_id);
   3683    if (err < 0) {
   3684        goto out_nofid;
   3685    }
   3686    trace_v9fs_getlock(pdu->tag, pdu->id, fid,
   3687                       glock.type, glock.start, glock.length);
   3688
   3689    fidp = get_fid(pdu, fid);
   3690    if (fidp == NULL) {
   3691        err = -ENOENT;
   3692        goto out_nofid;
   3693    }
   3694    err = v9fs_co_fstat(pdu, fidp, &stbuf);
   3695    if (err < 0) {
   3696        goto out;
   3697    }
   3698    glock.type = P9_LOCK_TYPE_UNLCK;
   3699    err = pdu_marshal(pdu, offset, "bqqds", glock.type,
   3700                          glock.start, glock.length, glock.proc_id,
   3701                          &glock.client_id);
   3702    if (err < 0) {
   3703        goto out;
   3704    }
   3705    err += offset;
   3706    trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
   3707                              glock.length, glock.proc_id);
   3708out:
   3709    put_fid(pdu, fidp);
   3710out_nofid:
   3711    pdu_complete(pdu, err);
   3712    v9fs_string_free(&glock.client_id);
   3713}
   3714
   3715static void coroutine_fn v9fs_mkdir(void *opaque)
   3716{
   3717    V9fsPDU *pdu = opaque;
   3718    size_t offset = 7;
   3719    int32_t fid;
   3720    struct stat stbuf;
   3721    V9fsQID qid;
   3722    V9fsString name;
   3723    V9fsFidState *fidp;
   3724    gid_t gid;
   3725    int mode;
   3726    int err = 0;
   3727
   3728    v9fs_string_init(&name);
   3729    err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
   3730    if (err < 0) {
   3731        goto out_nofid;
   3732    }
   3733    trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
   3734
   3735    if (name_is_illegal(name.data)) {
   3736        err = -ENOENT;
   3737        goto out_nofid;
   3738    }
   3739
   3740    if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
   3741        err = -EEXIST;
   3742        goto out_nofid;
   3743    }
   3744
   3745    fidp = get_fid(pdu, fid);
   3746    if (fidp == NULL) {
   3747        err = -ENOENT;
   3748        goto out_nofid;
   3749    }
   3750    err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
   3751    if (err < 0) {
   3752        goto out;
   3753    }
   3754    err = stat_to_qid(pdu, &stbuf, &qid);
   3755    if (err < 0) {
   3756        goto out;
   3757    }
   3758    err = pdu_marshal(pdu, offset, "Q", &qid);
   3759    if (err < 0) {
   3760        goto out;
   3761    }
   3762    err += offset;
   3763    trace_v9fs_mkdir_return(pdu->tag, pdu->id,
   3764                            qid.type, qid.version, qid.path, err);
   3765out:
   3766    put_fid(pdu, fidp);
   3767out_nofid:
   3768    pdu_complete(pdu, err);
   3769    v9fs_string_free(&name);
   3770}
   3771
   3772static void coroutine_fn v9fs_xattrwalk(void *opaque)
   3773{
   3774    int64_t size;
   3775    V9fsString name;
   3776    ssize_t err = 0;
   3777    size_t offset = 7;
   3778    int32_t fid, newfid;
   3779    V9fsFidState *file_fidp;
   3780    V9fsFidState *xattr_fidp = NULL;
   3781    V9fsPDU *pdu = opaque;
   3782    V9fsState *s = pdu->s;
   3783
   3784    v9fs_string_init(&name);
   3785    err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
   3786    if (err < 0) {
   3787        goto out_nofid;
   3788    }
   3789    trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
   3790
   3791    file_fidp = get_fid(pdu, fid);
   3792    if (file_fidp == NULL) {
   3793        err = -ENOENT;
   3794        goto out_nofid;
   3795    }
   3796    xattr_fidp = alloc_fid(s, newfid);
   3797    if (xattr_fidp == NULL) {
   3798        err = -EINVAL;
   3799        goto out;
   3800    }
   3801    v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
   3802    if (!v9fs_string_size(&name)) {
   3803        /*
   3804         * listxattr request. Get the size first
   3805         */
   3806        size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
   3807        if (size < 0) {
   3808            err = size;
   3809            clunk_fid(s, xattr_fidp->fid);
   3810            goto out;
   3811        }
   3812        /*
   3813         * Read the xattr value
   3814         */
   3815        xattr_fidp->fs.xattr.len = size;
   3816        xattr_fidp->fid_type = P9_FID_XATTR;
   3817        xattr_fidp->fs.xattr.xattrwalk_fid = true;
   3818        xattr_fidp->fs.xattr.value = g_malloc0(size);
   3819        if (size) {
   3820            err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
   3821                                     xattr_fidp->fs.xattr.value,
   3822                                     xattr_fidp->fs.xattr.len);
   3823            if (err < 0) {
   3824                clunk_fid(s, xattr_fidp->fid);
   3825                goto out;
   3826            }
   3827        }
   3828        err = pdu_marshal(pdu, offset, "q", size);
   3829        if (err < 0) {
   3830            goto out;
   3831        }
   3832        err += offset;
   3833    } else {
   3834        /*
   3835         * specific xattr fid. We check for xattr
   3836         * presence also collect the xattr size
   3837         */
   3838        size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
   3839                                 &name, NULL, 0);
   3840        if (size < 0) {
   3841            err = size;
   3842            clunk_fid(s, xattr_fidp->fid);
   3843            goto out;
   3844        }
   3845        /*
   3846         * Read the xattr value
   3847         */
   3848        xattr_fidp->fs.xattr.len = size;
   3849        xattr_fidp->fid_type = P9_FID_XATTR;
   3850        xattr_fidp->fs.xattr.xattrwalk_fid = true;
   3851        xattr_fidp->fs.xattr.value = g_malloc0(size);
   3852        if (size) {
   3853            err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
   3854                                    &name, xattr_fidp->fs.xattr.value,
   3855                                    xattr_fidp->fs.xattr.len);
   3856            if (err < 0) {
   3857                clunk_fid(s, xattr_fidp->fid);
   3858                goto out;
   3859            }
   3860        }
   3861        err = pdu_marshal(pdu, offset, "q", size);
   3862        if (err < 0) {
   3863            goto out;
   3864        }
   3865        err += offset;
   3866    }
   3867    trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
   3868out:
   3869    put_fid(pdu, file_fidp);
   3870    if (xattr_fidp) {
   3871        put_fid(pdu, xattr_fidp);
   3872    }
   3873out_nofid:
   3874    pdu_complete(pdu, err);
   3875    v9fs_string_free(&name);
   3876}
   3877
   3878static void coroutine_fn v9fs_xattrcreate(void *opaque)
   3879{
   3880    int flags, rflags = 0;
   3881    int32_t fid;
   3882    uint64_t size;
   3883    ssize_t err = 0;
   3884    V9fsString name;
   3885    size_t offset = 7;
   3886    V9fsFidState *file_fidp;
   3887    V9fsFidState *xattr_fidp;
   3888    V9fsPDU *pdu = opaque;
   3889
   3890    v9fs_string_init(&name);
   3891    err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
   3892    if (err < 0) {
   3893        goto out_nofid;
   3894    }
   3895    trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
   3896
   3897    if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
   3898        err = -EINVAL;
   3899        goto out_nofid;
   3900    }
   3901
   3902    if (flags & P9_XATTR_CREATE) {
   3903        rflags |= XATTR_CREATE;
   3904    }
   3905
   3906    if (flags & P9_XATTR_REPLACE) {
   3907        rflags |= XATTR_REPLACE;
   3908    }
   3909
   3910    if (size > XATTR_SIZE_MAX) {
   3911        err = -E2BIG;
   3912        goto out_nofid;
   3913    }
   3914
   3915    file_fidp = get_fid(pdu, fid);
   3916    if (file_fidp == NULL) {
   3917        err = -EINVAL;
   3918        goto out_nofid;
   3919    }
   3920    if (file_fidp->fid_type != P9_FID_NONE) {
   3921        err = -EINVAL;
   3922        goto out_put_fid;
   3923    }
   3924
   3925    /* Make the file fid point to xattr */
   3926    xattr_fidp = file_fidp;
   3927    xattr_fidp->fid_type = P9_FID_XATTR;
   3928    xattr_fidp->fs.xattr.copied_len = 0;
   3929    xattr_fidp->fs.xattr.xattrwalk_fid = false;
   3930    xattr_fidp->fs.xattr.len = size;
   3931    xattr_fidp->fs.xattr.flags = rflags;
   3932    v9fs_string_init(&xattr_fidp->fs.xattr.name);
   3933    v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
   3934    xattr_fidp->fs.xattr.value = g_malloc0(size);
   3935    err = offset;
   3936out_put_fid:
   3937    put_fid(pdu, file_fidp);
   3938out_nofid:
   3939    pdu_complete(pdu, err);
   3940    v9fs_string_free(&name);
   3941}
   3942
   3943static void coroutine_fn v9fs_readlink(void *opaque)
   3944{
   3945    V9fsPDU *pdu = opaque;
   3946    size_t offset = 7;
   3947    V9fsString target;
   3948    int32_t fid;
   3949    int err = 0;
   3950    V9fsFidState *fidp;
   3951
   3952    err = pdu_unmarshal(pdu, offset, "d", &fid);
   3953    if (err < 0) {
   3954        goto out_nofid;
   3955    }
   3956    trace_v9fs_readlink(pdu->tag, pdu->id, fid);
   3957    fidp = get_fid(pdu, fid);
   3958    if (fidp == NULL) {
   3959        err = -ENOENT;
   3960        goto out_nofid;
   3961    }
   3962
   3963    v9fs_string_init(&target);
   3964    err = v9fs_co_readlink(pdu, &fidp->path, &target);
   3965    if (err < 0) {
   3966        goto out;
   3967    }
   3968    err = pdu_marshal(pdu, offset, "s", &target);
   3969    if (err < 0) {
   3970        v9fs_string_free(&target);
   3971        goto out;
   3972    }
   3973    err += offset;
   3974    trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
   3975    v9fs_string_free(&target);
   3976out:
   3977    put_fid(pdu, fidp);
   3978out_nofid:
   3979    pdu_complete(pdu, err);
   3980}
   3981
   3982static CoroutineEntry *pdu_co_handlers[] = {
   3983    [P9_TREADDIR] = v9fs_readdir,
   3984    [P9_TSTATFS] = v9fs_statfs,
   3985    [P9_TGETATTR] = v9fs_getattr,
   3986    [P9_TSETATTR] = v9fs_setattr,
   3987    [P9_TXATTRWALK] = v9fs_xattrwalk,
   3988    [P9_TXATTRCREATE] = v9fs_xattrcreate,
   3989    [P9_TMKNOD] = v9fs_mknod,
   3990    [P9_TRENAME] = v9fs_rename,
   3991    [P9_TLOCK] = v9fs_lock,
   3992    [P9_TGETLOCK] = v9fs_getlock,
   3993    [P9_TRENAMEAT] = v9fs_renameat,
   3994    [P9_TREADLINK] = v9fs_readlink,
   3995    [P9_TUNLINKAT] = v9fs_unlinkat,
   3996    [P9_TMKDIR] = v9fs_mkdir,
   3997    [P9_TVERSION] = v9fs_version,
   3998    [P9_TLOPEN] = v9fs_open,
   3999    [P9_TATTACH] = v9fs_attach,
   4000    [P9_TSTAT] = v9fs_stat,
   4001    [P9_TWALK] = v9fs_walk,
   4002    [P9_TCLUNK] = v9fs_clunk,
   4003    [P9_TFSYNC] = v9fs_fsync,
   4004    [P9_TOPEN] = v9fs_open,
   4005    [P9_TREAD] = v9fs_read,
   4006#if 0
   4007    [P9_TAUTH] = v9fs_auth,
   4008#endif
   4009    [P9_TFLUSH] = v9fs_flush,
   4010    [P9_TLINK] = v9fs_link,
   4011    [P9_TSYMLINK] = v9fs_symlink,
   4012    [P9_TCREATE] = v9fs_create,
   4013    [P9_TLCREATE] = v9fs_lcreate,
   4014    [P9_TWRITE] = v9fs_write,
   4015    [P9_TWSTAT] = v9fs_wstat,
   4016    [P9_TREMOVE] = v9fs_remove,
   4017};
   4018
   4019static void coroutine_fn v9fs_op_not_supp(void *opaque)
   4020{
   4021    V9fsPDU *pdu = opaque;
   4022    pdu_complete(pdu, -EOPNOTSUPP);
   4023}
   4024
   4025static void coroutine_fn v9fs_fs_ro(void *opaque)
   4026{
   4027    V9fsPDU *pdu = opaque;
   4028    pdu_complete(pdu, -EROFS);
   4029}
   4030
   4031static inline bool is_read_only_op(V9fsPDU *pdu)
   4032{
   4033    switch (pdu->id) {
   4034    case P9_TREADDIR:
   4035    case P9_TSTATFS:
   4036    case P9_TGETATTR:
   4037    case P9_TXATTRWALK:
   4038    case P9_TLOCK:
   4039    case P9_TGETLOCK:
   4040    case P9_TREADLINK:
   4041    case P9_TVERSION:
   4042    case P9_TLOPEN:
   4043    case P9_TATTACH:
   4044    case P9_TSTAT:
   4045    case P9_TWALK:
   4046    case P9_TCLUNK:
   4047    case P9_TFSYNC:
   4048    case P9_TOPEN:
   4049    case P9_TREAD:
   4050    case P9_TAUTH:
   4051    case P9_TFLUSH:
   4052        return 1;
   4053    default:
   4054        return 0;
   4055    }
   4056}
   4057
   4058void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
   4059{
   4060    Coroutine *co;
   4061    CoroutineEntry *handler;
   4062    V9fsState *s = pdu->s;
   4063
   4064    pdu->size = le32_to_cpu(hdr->size_le);
   4065    pdu->id = hdr->id;
   4066    pdu->tag = le16_to_cpu(hdr->tag_le);
   4067
   4068    if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
   4069        (pdu_co_handlers[pdu->id] == NULL)) {
   4070        handler = v9fs_op_not_supp;
   4071    } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
   4072        handler = v9fs_fs_ro;
   4073    } else {
   4074        handler = pdu_co_handlers[pdu->id];
   4075    }
   4076
   4077    qemu_co_queue_init(&pdu->complete);
   4078    co = qemu_coroutine_create(handler, pdu);
   4079    qemu_coroutine_enter(co);
   4080}
   4081
   4082/* Returns 0 on success, 1 on failure. */
   4083int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
   4084                               Error **errp)
   4085{
   4086    ERRP_GUARD();
   4087    int i, len;
   4088    struct stat stat;
   4089    FsDriverEntry *fse;
   4090    V9fsPath path;
   4091    int rc = 1;
   4092
   4093    assert(!s->transport);
   4094    s->transport = t;
   4095
   4096    /* initialize pdu allocator */
   4097    QLIST_INIT(&s->free_list);
   4098    QLIST_INIT(&s->active_list);
   4099    for (i = 0; i < MAX_REQ; i++) {
   4100        QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
   4101        s->pdus[i].s = s;
   4102        s->pdus[i].idx = i;
   4103    }
   4104
   4105    v9fs_path_init(&path);
   4106
   4107    fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
   4108
   4109    if (!fse) {
   4110        /* We don't have a fsdev identified by fsdev_id */
   4111        error_setg(errp, "9pfs device couldn't find fsdev with the "
   4112                   "id = %s",
   4113                   s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
   4114        goto out;
   4115    }
   4116
   4117    if (!s->fsconf.tag) {
   4118        /* we haven't specified a mount_tag */
   4119        error_setg(errp, "fsdev with id %s needs mount_tag arguments",
   4120                   s->fsconf.fsdev_id);
   4121        goto out;
   4122    }
   4123
   4124    s->ctx.export_flags = fse->export_flags;
   4125    s->ctx.fs_root = g_strdup(fse->path);
   4126    s->ctx.exops.get_st_gen = NULL;
   4127    len = strlen(s->fsconf.tag);
   4128    if (len > MAX_TAG_LEN - 1) {
   4129        error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
   4130                   "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
   4131        goto out;
   4132    }
   4133
   4134    s->tag = g_strdup(s->fsconf.tag);
   4135    s->ctx.uid = -1;
   4136
   4137    s->ops = fse->ops;
   4138
   4139    s->ctx.fmode = fse->fmode;
   4140    s->ctx.dmode = fse->dmode;
   4141
   4142    QSIMPLEQ_INIT(&s->fid_list);
   4143    qemu_co_rwlock_init(&s->rename_lock);
   4144
   4145    if (s->ops->init(&s->ctx, errp) < 0) {
   4146        error_prepend(errp, "cannot initialize fsdev '%s': ",
   4147                      s->fsconf.fsdev_id);
   4148        goto out;
   4149    }
   4150
   4151    /*
   4152     * Check details of export path, We need to use fs driver
   4153     * call back to do that. Since we are in the init path, we don't
   4154     * use co-routines here.
   4155     */
   4156    if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
   4157        error_setg(errp,
   4158                   "error in converting name to path %s", strerror(errno));
   4159        goto out;
   4160    }
   4161    if (s->ops->lstat(&s->ctx, &path, &stat)) {
   4162        error_setg(errp, "share path %s does not exist", fse->path);
   4163        goto out;
   4164    } else if (!S_ISDIR(stat.st_mode)) {
   4165        error_setg(errp, "share path %s is not a directory", fse->path);
   4166        goto out;
   4167    }
   4168
   4169    s->dev_id = stat.st_dev;
   4170
   4171    /* init inode remapping : */
   4172    /* hash table for variable length inode suffixes */
   4173    qpd_table_init(&s->qpd_table);
   4174    /* hash table for slow/full inode remapping (most users won't need it) */
   4175    qpf_table_init(&s->qpf_table);
   4176    /* hash table for quick inode remapping */
   4177    qpp_table_init(&s->qpp_table);
   4178    s->qp_ndevices = 0;
   4179    s->qp_affix_next = 1; /* reserve 0 to detect overflow */
   4180    s->qp_fullpath_next = 1;
   4181
   4182    s->ctx.fst = &fse->fst;
   4183    fsdev_throttle_init(s->ctx.fst);
   4184
   4185    rc = 0;
   4186out:
   4187    if (rc) {
   4188        v9fs_device_unrealize_common(s);
   4189    }
   4190    v9fs_path_free(&path);
   4191    return rc;
   4192}
   4193
   4194void v9fs_device_unrealize_common(V9fsState *s)
   4195{
   4196    if (s->ops && s->ops->cleanup) {
   4197        s->ops->cleanup(&s->ctx);
   4198    }
   4199    if (s->ctx.fst) {
   4200        fsdev_throttle_cleanup(s->ctx.fst);
   4201    }
   4202    g_free(s->tag);
   4203    qp_table_destroy(&s->qpd_table);
   4204    qp_table_destroy(&s->qpp_table);
   4205    qp_table_destroy(&s->qpf_table);
   4206    g_free(s->ctx.fs_root);
   4207}
   4208
   4209typedef struct VirtfsCoResetData {
   4210    V9fsPDU pdu;
   4211    bool done;
   4212} VirtfsCoResetData;
   4213
   4214static void coroutine_fn virtfs_co_reset(void *opaque)
   4215{
   4216    VirtfsCoResetData *data = opaque;
   4217
   4218    virtfs_reset(&data->pdu);
   4219    data->done = true;
   4220}
   4221
   4222void v9fs_reset(V9fsState *s)
   4223{
   4224    VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
   4225    Coroutine *co;
   4226
   4227    while (!QLIST_EMPTY(&s->active_list)) {
   4228        aio_poll(qemu_get_aio_context(), true);
   4229    }
   4230
   4231    co = qemu_coroutine_create(virtfs_co_reset, &data);
   4232    qemu_coroutine_enter(co);
   4233
   4234    while (!data.done) {
   4235        aio_poll(qemu_get_aio_context(), true);
   4236    }
   4237}
   4238
   4239static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
   4240{
   4241    struct rlimit rlim;
   4242    if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
   4243        error_report("Failed to get the resource limit");
   4244        exit(1);
   4245    }
   4246    open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3);
   4247    open_fd_rc = rlim.rlim_cur / 2;
   4248}