cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

client.c (48932B)


      1/*
      2 *  Copyright (C) 2016-2019 Red Hat, Inc.
      3 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
      4 *
      5 *  Network Block Device Client Side
      6 *
      7 *  This program is free software; you can redistribute it and/or modify
      8 *  it under the terms of the GNU General Public License as published by
      9 *  the Free Software Foundation; under version 2 of the License.
     10 *
     11 *  This program is distributed in the hope that it will be useful,
     12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 *  GNU General Public License for more details.
     15 *
     16 *  You should have received a copy of the GNU General Public License
     17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
     18 */
     19
     20#include "qemu/osdep.h"
     21#include "qapi/error.h"
     22#include "qemu/queue.h"
     23#include "trace.h"
     24#include "nbd-internal.h"
     25#include "qemu/cutils.h"
     26
     27/* Definitions for opaque data types */
     28
     29static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
     30
     31/* That's all folks */
     32
     33/* Basic flow for negotiation
     34
     35   Server         Client
     36   Negotiate
     37
     38   or
     39
     40   Server         Client
     41   Negotiate #1
     42                  Option
     43   Negotiate #2
     44
     45   ----
     46
     47   followed by
     48
     49   Server         Client
     50                  Request
     51   Response
     52                  Request
     53   Response
     54                  ...
     55   ...
     56                  Request (type == 2)
     57
     58*/
     59
     60/* Send an option request.
     61 *
     62 * The request is for option @opt, with @data containing @len bytes of
     63 * additional payload for the request (@len may be -1 to treat @data as
     64 * a C string; and @data may be NULL if @len is 0).
     65 * Return 0 if successful, -1 with errp set if it is impossible to
     66 * continue. */
     67static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
     68                                   uint32_t len, const char *data,
     69                                   Error **errp)
     70{
     71    ERRP_GUARD();
     72    NBDOption req;
     73    QEMU_BUILD_BUG_ON(sizeof(req) != 16);
     74
     75    if (len == -1) {
     76        req.length = len = strlen(data);
     77    }
     78    trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
     79
     80    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
     81    stl_be_p(&req.option, opt);
     82    stl_be_p(&req.length, len);
     83
     84    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
     85        error_prepend(errp, "Failed to send option request header: ");
     86        return -1;
     87    }
     88
     89    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
     90        error_prepend(errp, "Failed to send option request data: ");
     91        return -1;
     92    }
     93
     94    return 0;
     95}
     96
     97/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
     98 * not going to attempt further negotiation. */
     99static void nbd_send_opt_abort(QIOChannel *ioc)
    100{
    101    /* Technically, a compliant server is supposed to reply to us; but
    102     * older servers disconnected instead. At any rate, we're allowed
    103     * to disconnect without waiting for the server reply, so we don't
    104     * even care if the request makes it to the server, let alone
    105     * waiting around for whether the server replies. */
    106    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
    107}
    108
    109
    110/* Receive the header of an option reply, which should match the given
    111 * opt.  Read through the length field, but NOT the length bytes of
    112 * payload. Return 0 if successful, -1 with errp set if it is
    113 * impossible to continue. */
    114static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
    115                                    NBDOptionReply *reply, Error **errp)
    116{
    117    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
    118    if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
    119        nbd_send_opt_abort(ioc);
    120        return -1;
    121    }
    122    reply->magic = be64_to_cpu(reply->magic);
    123    reply->option = be32_to_cpu(reply->option);
    124    reply->type = be32_to_cpu(reply->type);
    125    reply->length = be32_to_cpu(reply->length);
    126
    127    trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
    128                                   reply->type, nbd_rep_lookup(reply->type),
    129                                   reply->length);
    130
    131    if (reply->magic != NBD_REP_MAGIC) {
    132        error_setg(errp, "Unexpected option reply magic");
    133        nbd_send_opt_abort(ioc);
    134        return -1;
    135    }
    136    if (reply->option != opt) {
    137        error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)",
    138                   reply->option, nbd_opt_lookup(reply->option),
    139                   opt, nbd_opt_lookup(opt));
    140        nbd_send_opt_abort(ioc);
    141        return -1;
    142    }
    143    return 0;
    144}
    145
    146/*
    147 * If reply represents success, return 1 without further action.  If
    148 * reply represents an error, consume the optional payload of the
    149 * packet on ioc.  Then return 0 for unsupported (so the client can
    150 * fall back to other approaches), where @strict determines if only
    151 * ERR_UNSUP or all errors fit that category, or -1 with errp set for
    152 * other errors.
    153 */
    154static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
    155                                bool strict, Error **errp)
    156{
    157    ERRP_GUARD();
    158    g_autofree char *msg = NULL;
    159
    160    if (!(reply->type & (1 << 31))) {
    161        return 1;
    162    }
    163
    164    if (reply->length) {
    165        if (reply->length > NBD_MAX_BUFFER_SIZE) {
    166            error_setg(errp, "server error %" PRIu32
    167                       " (%s) message is too long",
    168                       reply->type, nbd_rep_lookup(reply->type));
    169            goto err;
    170        }
    171        msg = g_malloc(reply->length + 1);
    172        if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
    173            error_prepend(errp, "Failed to read option error %" PRIu32
    174                          " (%s) message: ",
    175                          reply->type, nbd_rep_lookup(reply->type));
    176            goto err;
    177        }
    178        msg[reply->length] = '\0';
    179        trace_nbd_server_error_msg(reply->type,
    180                                   nbd_reply_type_lookup(reply->type), msg);
    181    }
    182
    183    if (reply->type == NBD_REP_ERR_UNSUP || !strict) {
    184        trace_nbd_reply_err_ignored(reply->option,
    185                                    nbd_opt_lookup(reply->option),
    186                                    reply->type, nbd_rep_lookup(reply->type));
    187        return 0;
    188    }
    189
    190    switch (reply->type) {
    191    case NBD_REP_ERR_POLICY:
    192        error_setg(errp, "Denied by server for option %" PRIu32 " (%s)",
    193                   reply->option, nbd_opt_lookup(reply->option));
    194        break;
    195
    196    case NBD_REP_ERR_INVALID:
    197        error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)",
    198                   reply->option, nbd_opt_lookup(reply->option));
    199        break;
    200
    201    case NBD_REP_ERR_PLATFORM:
    202        error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)",
    203                   reply->option, nbd_opt_lookup(reply->option));
    204        break;
    205
    206    case NBD_REP_ERR_TLS_REQD:
    207        error_setg(errp, "TLS negotiation required before option %" PRIu32
    208                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
    209        error_append_hint(errp, "Did you forget a valid tls-creds?\n");
    210        break;
    211
    212    case NBD_REP_ERR_UNKNOWN:
    213        error_setg(errp, "Requested export not available");
    214        break;
    215
    216    case NBD_REP_ERR_SHUTDOWN:
    217        error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)",
    218                   reply->option, nbd_opt_lookup(reply->option));
    219        break;
    220
    221    case NBD_REP_ERR_BLOCK_SIZE_REQD:
    222        error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32
    223                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
    224        break;
    225
    226    default:
    227        error_setg(errp, "Unknown error code when asking for option %" PRIu32
    228                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
    229        break;
    230    }
    231
    232    if (msg) {
    233        error_append_hint(errp, "server reported: %s\n", msg);
    234    }
    235
    236 err:
    237    nbd_send_opt_abort(ioc);
    238    return -1;
    239}
    240
    241/* nbd_receive_list:
    242 * Process another portion of the NBD_OPT_LIST reply, populating any
    243 * name received into *@name. If @description is non-NULL, and the
    244 * server provided a description, that is also populated. The caller
    245 * must eventually call g_free() on success.
    246 * Returns 1 if name and description were set and iteration must continue,
    247 *         0 if iteration is complete (including if OPT_LIST unsupported),
    248 *         -1 with @errp set if an unrecoverable error occurred.
    249 */
    250static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
    251                            Error **errp)
    252{
    253    NBDOptionReply reply;
    254    uint32_t len;
    255    uint32_t namelen;
    256    g_autofree char *local_name = NULL;
    257    g_autofree char *local_desc = NULL;
    258    int error;
    259
    260    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
    261        return -1;
    262    }
    263    error = nbd_handle_reply_err(ioc, &reply, true, errp);
    264    if (error <= 0) {
    265        return error;
    266    }
    267    len = reply.length;
    268
    269    if (reply.type == NBD_REP_ACK) {
    270        if (len != 0) {
    271            error_setg(errp, "length too long for option end");
    272            nbd_send_opt_abort(ioc);
    273            return -1;
    274        }
    275        return 0;
    276    } else if (reply.type != NBD_REP_SERVER) {
    277        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
    278                   reply.type, nbd_rep_lookup(reply.type),
    279                   NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER));
    280        nbd_send_opt_abort(ioc);
    281        return -1;
    282    }
    283
    284    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
    285        error_setg(errp, "incorrect option length %" PRIu32, len);
    286        nbd_send_opt_abort(ioc);
    287        return -1;
    288    }
    289    if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
    290        nbd_send_opt_abort(ioc);
    291        return -1;
    292    }
    293    len -= sizeof(namelen);
    294    if (len < namelen || namelen > NBD_MAX_STRING_SIZE) {
    295        error_setg(errp, "incorrect name length in server's list response");
    296        nbd_send_opt_abort(ioc);
    297        return -1;
    298    }
    299
    300    local_name = g_malloc(namelen + 1);
    301    if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
    302        nbd_send_opt_abort(ioc);
    303        return -1;
    304    }
    305    local_name[namelen] = '\0';
    306    len -= namelen;
    307    if (len) {
    308        if (len > NBD_MAX_STRING_SIZE) {
    309            error_setg(errp, "incorrect description length in server's "
    310                       "list response");
    311            nbd_send_opt_abort(ioc);
    312            return -1;
    313        }
    314        local_desc = g_malloc(len + 1);
    315        if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
    316            nbd_send_opt_abort(ioc);
    317            return -1;
    318        }
    319        local_desc[len] = '\0';
    320    }
    321
    322    trace_nbd_receive_list(local_name, local_desc ?: "");
    323    *name = g_steal_pointer(&local_name);
    324    if (description) {
    325        *description = g_steal_pointer(&local_desc);
    326    }
    327    return 1;
    328}
    329
    330
    331/*
    332 * nbd_opt_info_or_go:
    333 * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply.
    334 * Returns -1 if the option proves the export @info->name cannot be
    335 * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and
    336 * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
    337 * go (with the rest of @info populated).
    338 */
    339static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
    340                              NBDExportInfo *info, Error **errp)
    341{
    342    ERRP_GUARD();
    343    NBDOptionReply reply;
    344    uint32_t len = strlen(info->name);
    345    uint16_t type;
    346    int error;
    347    char *buf;
    348
    349    /* The protocol requires that the server send NBD_INFO_EXPORT with
    350     * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
    351     * flags still 0 is a witness of a broken server. */
    352    info->flags = 0;
    353
    354    assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
    355    trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
    356    buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
    357    stl_be_p(buf, len);
    358    memcpy(buf + 4, info->name, len);
    359    /* At most one request, everything else up to server */
    360    stw_be_p(buf + 4 + len, info->request_sizes);
    361    if (info->request_sizes) {
    362        stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
    363    }
    364    error = nbd_send_option_request(ioc, opt,
    365                                    4 + len + 2 + 2 * info->request_sizes,
    366                                    buf, errp);
    367    g_free(buf);
    368    if (error < 0) {
    369        return -1;
    370    }
    371
    372    while (1) {
    373        if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
    374            return -1;
    375        }
    376        error = nbd_handle_reply_err(ioc, &reply, true, errp);
    377        if (error <= 0) {
    378            return error;
    379        }
    380        len = reply.length;
    381
    382        if (reply.type == NBD_REP_ACK) {
    383            /*
    384             * Server is done sending info, and moved into transmission
    385             * phase for NBD_OPT_GO, but make sure it sent flags
    386             */
    387            if (len) {
    388                error_setg(errp, "server sent invalid NBD_REP_ACK");
    389                return -1;
    390            }
    391            if (!info->flags) {
    392                error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
    393                return -1;
    394            }
    395            trace_nbd_opt_info_go_success(nbd_opt_lookup(opt));
    396            return 1;
    397        }
    398        if (reply.type != NBD_REP_INFO) {
    399            error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)",
    400                       reply.type, nbd_rep_lookup(reply.type),
    401                       NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO));
    402            nbd_send_opt_abort(ioc);
    403            return -1;
    404        }
    405        if (len < sizeof(type)) {
    406            error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
    407                       len);
    408            nbd_send_opt_abort(ioc);
    409            return -1;
    410        }
    411        if (nbd_read16(ioc, &type, "info type", errp) < 0) {
    412            nbd_send_opt_abort(ioc);
    413            return -1;
    414        }
    415        len -= sizeof(type);
    416        switch (type) {
    417        case NBD_INFO_EXPORT:
    418            if (len != sizeof(info->size) + sizeof(info->flags)) {
    419                error_setg(errp, "remaining export info len %" PRIu32
    420                           " is unexpected size", len);
    421                nbd_send_opt_abort(ioc);
    422                return -1;
    423            }
    424            if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
    425                nbd_send_opt_abort(ioc);
    426                return -1;
    427            }
    428            if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
    429                nbd_send_opt_abort(ioc);
    430                return -1;
    431            }
    432            if (info->min_block &&
    433                !QEMU_IS_ALIGNED(info->size, info->min_block)) {
    434                error_setg(errp, "export size %" PRIu64 " is not multiple of "
    435                           "minimum block size %" PRIu32, info->size,
    436                           info->min_block);
    437                nbd_send_opt_abort(ioc);
    438                return -1;
    439            }
    440            trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
    441            break;
    442
    443        case NBD_INFO_BLOCK_SIZE:
    444            if (len != sizeof(info->min_block) * 3) {
    445                error_setg(errp, "remaining export info len %" PRIu32
    446                           " is unexpected size", len);
    447                nbd_send_opt_abort(ioc);
    448                return -1;
    449            }
    450            if (nbd_read32(ioc, &info->min_block, "info minimum block size",
    451                           errp) < 0) {
    452                nbd_send_opt_abort(ioc);
    453                return -1;
    454            }
    455            if (!is_power_of_2(info->min_block)) {
    456                error_setg(errp, "server minimum block size %" PRIu32
    457                           " is not a power of two", info->min_block);
    458                nbd_send_opt_abort(ioc);
    459                return -1;
    460            }
    461            if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
    462                           errp) < 0)
    463            {
    464                nbd_send_opt_abort(ioc);
    465                return -1;
    466            }
    467            if (!is_power_of_2(info->opt_block) ||
    468                info->opt_block < info->min_block) {
    469                error_setg(errp, "server preferred block size %" PRIu32
    470                           " is not valid", info->opt_block);
    471                nbd_send_opt_abort(ioc);
    472                return -1;
    473            }
    474            if (nbd_read32(ioc, &info->max_block, "info maximum block size",
    475                           errp) < 0)
    476            {
    477                nbd_send_opt_abort(ioc);
    478                return -1;
    479            }
    480            if (info->max_block < info->min_block) {
    481                error_setg(errp, "server maximum block size %" PRIu32
    482                           " is not valid", info->max_block);
    483                nbd_send_opt_abort(ioc);
    484                return -1;
    485            }
    486            trace_nbd_opt_info_block_size(info->min_block, info->opt_block,
    487                                          info->max_block);
    488            break;
    489
    490        default:
    491            /*
    492             * Not worth the bother to check if NBD_INFO_NAME or
    493             * NBD_INFO_DESCRIPTION exceed NBD_MAX_STRING_SIZE.
    494             */
    495            trace_nbd_opt_info_unknown(type, nbd_info_lookup(type));
    496            if (nbd_drop(ioc, len, errp) < 0) {
    497                error_prepend(errp, "Failed to read info payload: ");
    498                nbd_send_opt_abort(ioc);
    499                return -1;
    500            }
    501            break;
    502        }
    503    }
    504}
    505
    506/* Return -1 on failure, 0 if wantname is an available export. */
    507static int nbd_receive_query_exports(QIOChannel *ioc,
    508                                     const char *wantname,
    509                                     Error **errp)
    510{
    511    bool list_empty = true;
    512    bool found_export = false;
    513
    514    trace_nbd_receive_query_exports_start(wantname);
    515    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
    516        return -1;
    517    }
    518
    519    while (1) {
    520        char *name;
    521        int ret = nbd_receive_list(ioc, &name, NULL, errp);
    522
    523        if (ret < 0) {
    524            /* Server gave unexpected reply */
    525            return -1;
    526        } else if (ret == 0) {
    527            /* Done iterating. */
    528            if (list_empty) {
    529                /*
    530                 * We don't have enough context to tell a server that
    531                 * sent an empty list apart from a server that does
    532                 * not support the list command; but as this function
    533                 * is just used to trigger a nicer error message
    534                 * before trying NBD_OPT_EXPORT_NAME, assume the
    535                 * export is available.
    536                 */
    537                return 0;
    538            } else if (!found_export) {
    539                error_setg(errp, "No export with name '%s' available",
    540                           wantname);
    541                nbd_send_opt_abort(ioc);
    542                return -1;
    543            }
    544            trace_nbd_receive_query_exports_success(wantname);
    545            return 0;
    546        }
    547        list_empty = false;
    548        if (!strcmp(name, wantname)) {
    549            found_export = true;
    550        }
    551        g_free(name);
    552    }
    553}
    554
    555/*
    556 * nbd_request_simple_option: Send an option request, and parse the reply.
    557 * @strict controls whether ERR_UNSUP or all errors produce 0 status.
    558 * return 1 for successful negotiation,
    559 *        0 if operation is unsupported,
    560 *        -1 with errp set for any other error
    561 */
    562static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
    563                                     Error **errp)
    564{
    565    NBDOptionReply reply;
    566    int error;
    567
    568    if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
    569        return -1;
    570    }
    571
    572    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
    573        return -1;
    574    }
    575    error = nbd_handle_reply_err(ioc, &reply, strict, errp);
    576    if (error <= 0) {
    577        return error;
    578    }
    579
    580    if (reply.type != NBD_REP_ACK) {
    581        error_setg(errp, "Server answered option %d (%s) with unexpected "
    582                   "reply %" PRIu32 " (%s)", opt, nbd_opt_lookup(opt),
    583                   reply.type, nbd_rep_lookup(reply.type));
    584        nbd_send_opt_abort(ioc);
    585        return -1;
    586    }
    587
    588    if (reply.length != 0) {
    589        error_setg(errp, "Option %d ('%s') response length is %" PRIu32
    590                   " (it should be zero)", opt, nbd_opt_lookup(opt),
    591                   reply.length);
    592        nbd_send_opt_abort(ioc);
    593        return -1;
    594    }
    595
    596    return 1;
    597}
    598
    599static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
    600                                        QCryptoTLSCreds *tlscreds,
    601                                        const char *hostname, Error **errp)
    602{
    603    int ret;
    604    QIOChannelTLS *tioc;
    605    struct NBDTLSHandshakeData data = { 0 };
    606
    607    ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
    608    if (ret <= 0) {
    609        if (ret == 0) {
    610            error_setg(errp, "Server don't support STARTTLS option");
    611            nbd_send_opt_abort(ioc);
    612        }
    613        return NULL;
    614    }
    615
    616    trace_nbd_receive_starttls_new_client();
    617    tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
    618    if (!tioc) {
    619        return NULL;
    620    }
    621    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
    622    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
    623    trace_nbd_receive_starttls_tls_handshake();
    624    qio_channel_tls_handshake(tioc,
    625                              nbd_tls_handshake,
    626                              &data,
    627                              NULL,
    628                              NULL);
    629
    630    if (!data.complete) {
    631        g_main_loop_run(data.loop);
    632    }
    633    g_main_loop_unref(data.loop);
    634    if (data.error) {
    635        error_propagate(errp, data.error);
    636        object_unref(OBJECT(tioc));
    637        return NULL;
    638    }
    639
    640    return QIO_CHANNEL(tioc);
    641}
    642
    643/*
    644 * nbd_send_meta_query:
    645 * Send 0 or 1 set/list meta context queries.
    646 * Return 0 on success, -1 with errp set for any error
    647 */
    648static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
    649                               const char *export, const char *query,
    650                               Error **errp)
    651{
    652    int ret;
    653    uint32_t export_len = strlen(export);
    654    uint32_t queries = !!query;
    655    uint32_t query_len = 0;
    656    uint32_t data_len;
    657    char *data;
    658    char *p;
    659
    660    data_len = sizeof(export_len) + export_len + sizeof(queries);
    661    assert(export_len <= NBD_MAX_STRING_SIZE);
    662    if (query) {
    663        query_len = strlen(query);
    664        data_len += sizeof(query_len) + query_len;
    665        assert(query_len <= NBD_MAX_STRING_SIZE);
    666    } else {
    667        assert(opt == NBD_OPT_LIST_META_CONTEXT);
    668    }
    669    p = data = g_malloc(data_len);
    670
    671    trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)", export);
    672    stl_be_p(p, export_len);
    673    memcpy(p += sizeof(export_len), export, export_len);
    674    stl_be_p(p += export_len, queries);
    675    if (query) {
    676        stl_be_p(p += sizeof(queries), query_len);
    677        memcpy(p += sizeof(query_len), query, query_len);
    678    }
    679
    680    ret = nbd_send_option_request(ioc, opt, data_len, data, errp);
    681    g_free(data);
    682    return ret;
    683}
    684
    685/*
    686 * nbd_receive_one_meta_context:
    687 * Called in a loop to receive and trace one set/list meta context reply.
    688 * Pass non-NULL @name or @id to collect results back to the caller, which
    689 * must eventually call g_free().
    690 * return 1 if name is set and iteration must continue,
    691 *        0 if iteration is complete (including if option is unsupported),
    692 *        -1 with errp set for any error
    693 */
    694static int nbd_receive_one_meta_context(QIOChannel *ioc,
    695                                        uint32_t opt,
    696                                        char **name,
    697                                        uint32_t *id,
    698                                        Error **errp)
    699{
    700    int ret;
    701    NBDOptionReply reply;
    702    char *local_name = NULL;
    703    uint32_t local_id;
    704
    705    if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
    706        return -1;
    707    }
    708
    709    ret = nbd_handle_reply_err(ioc, &reply, false, errp);
    710    if (ret <= 0) {
    711        return ret;
    712    }
    713
    714    if (reply.type == NBD_REP_ACK) {
    715        if (reply.length != 0) {
    716            error_setg(errp, "Unexpected length to ACK response");
    717            nbd_send_opt_abort(ioc);
    718            return -1;
    719        }
    720        return 0;
    721    } else if (reply.type != NBD_REP_META_CONTEXT) {
    722        error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
    723                   reply.type, nbd_rep_lookup(reply.type),
    724                   NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT));
    725        nbd_send_opt_abort(ioc);
    726        return -1;
    727    }
    728
    729    if (reply.length <= sizeof(local_id) ||
    730        reply.length > NBD_MAX_BUFFER_SIZE) {
    731        error_setg(errp, "Failed to negotiate meta context, server "
    732                   "answered with unexpected length %" PRIu32,
    733                   reply.length);
    734        nbd_send_opt_abort(ioc);
    735        return -1;
    736    }
    737
    738    if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
    739        return -1;
    740    }
    741
    742    reply.length -= sizeof(local_id);
    743    local_name = g_malloc(reply.length + 1);
    744    if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
    745        g_free(local_name);
    746        return -1;
    747    }
    748    local_name[reply.length] = '\0';
    749    trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id);
    750
    751    if (name) {
    752        *name = local_name;
    753    } else {
    754        g_free(local_name);
    755    }
    756    if (id) {
    757        *id = local_id;
    758    }
    759    return 1;
    760}
    761
    762/*
    763 * nbd_negotiate_simple_meta_context:
    764 * Request the server to set the meta context for export @info->name
    765 * using @info->x_dirty_bitmap with a fallback to "base:allocation",
    766 * setting @info->context_id to the resulting id. Fail if the server
    767 * responds with more than one context or with a context different
    768 * than the query.
    769 * return 1 for successful negotiation,
    770 *        0 if operation is unsupported,
    771 *        -1 with errp set for any other error
    772 */
    773static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
    774                                             NBDExportInfo *info,
    775                                             Error **errp)
    776{
    777    /*
    778     * TODO: Removing the x_dirty_bitmap hack will mean refactoring
    779     * this function to request and store ids for multiple contexts
    780     * (both base:allocation and a dirty bitmap), at which point this
    781     * function should lose the term _simple.
    782     */
    783    int ret;
    784    const char *context = info->x_dirty_bitmap ?: "base:allocation";
    785    bool received = false;
    786    char *name = NULL;
    787
    788    if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT,
    789                            info->name, context, errp) < 0) {
    790        return -1;
    791    }
    792
    793    ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
    794                                       &name, &info->context_id, errp);
    795    if (ret < 0) {
    796        return -1;
    797    }
    798    if (ret == 1) {
    799        if (strcmp(context, name)) {
    800            error_setg(errp, "Failed to negotiate meta context '%s', server "
    801                       "answered with different context '%s'", context,
    802                       name);
    803            g_free(name);
    804            nbd_send_opt_abort(ioc);
    805            return -1;
    806        }
    807        g_free(name);
    808        received = true;
    809
    810        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
    811                                           NULL, NULL, errp);
    812        if (ret < 0) {
    813            return -1;
    814        }
    815    }
    816    if (ret != 0) {
    817        error_setg(errp, "Server answered with more than one context");
    818        nbd_send_opt_abort(ioc);
    819        return -1;
    820    }
    821    return received;
    822}
    823
    824/*
    825 * nbd_list_meta_contexts:
    826 * Request the server to list all meta contexts for export @info->name.
    827 * return 0 if list is complete (even if empty),
    828 *        -1 with errp set for any error
    829 */
    830static int nbd_list_meta_contexts(QIOChannel *ioc,
    831                                  NBDExportInfo *info,
    832                                  Error **errp)
    833{
    834    int ret;
    835    int seen_any = false;
    836    int seen_qemu = false;
    837
    838    if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
    839                            info->name, NULL, errp) < 0) {
    840        return -1;
    841    }
    842
    843    while (1) {
    844        char *context;
    845
    846        ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT,
    847                                           &context, NULL, errp);
    848        if (ret == 0 && seen_any && !seen_qemu) {
    849            /*
    850             * Work around qemu 3.0 bug: the server forgot to send
    851             * "qemu:" replies to 0 queries. If we saw at least one
    852             * reply (probably base:allocation), but none of them were
    853             * qemu:, then run a more specific query to make sure.
    854             */
    855            seen_qemu = true;
    856            if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
    857                                    info->name, "qemu:", errp) < 0) {
    858                return -1;
    859            }
    860            continue;
    861        }
    862        if (ret <= 0) {
    863            return ret;
    864        }
    865        seen_any = true;
    866        seen_qemu |= strstart(context, "qemu:", NULL);
    867        info->contexts = g_renew(char *, info->contexts, ++info->n_contexts);
    868        info->contexts[info->n_contexts - 1] = context;
    869    }
    870}
    871
    872/*
    873 * nbd_start_negotiate:
    874 * Start the handshake to the server.  After a positive return, the server
    875 * is ready to accept additional NBD_OPT requests.
    876 * Returns: negative errno: failure talking to server
    877 *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
    878 *          1: server is newstyle, but can only accept EXPORT_NAME
    879 *          2: server is newstyle, but lacks structured replies
    880 *          3: server is newstyle and set up for structured replies
    881 */
    882static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
    883                               QCryptoTLSCreds *tlscreds,
    884                               const char *hostname, QIOChannel **outioc,
    885                               bool structured_reply, bool *zeroes,
    886                               Error **errp)
    887{
    888    ERRP_GUARD();
    889    uint64_t magic;
    890
    891    trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "<null>");
    892
    893    if (zeroes) {
    894        *zeroes = true;
    895    }
    896    if (outioc) {
    897        *outioc = NULL;
    898    }
    899    if (tlscreds && !outioc) {
    900        error_setg(errp, "Output I/O channel required for TLS");
    901        return -EINVAL;
    902    }
    903
    904    if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
    905        return -EINVAL;
    906    }
    907    trace_nbd_receive_negotiate_magic(magic);
    908
    909    if (magic != NBD_INIT_MAGIC) {
    910        error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic);
    911        return -EINVAL;
    912    }
    913
    914    if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
    915        return -EINVAL;
    916    }
    917    trace_nbd_receive_negotiate_magic(magic);
    918
    919    if (magic == NBD_OPTS_MAGIC) {
    920        uint32_t clientflags = 0;
    921        uint16_t globalflags;
    922        bool fixedNewStyle = false;
    923
    924        if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
    925            return -EINVAL;
    926        }
    927        trace_nbd_receive_negotiate_server_flags(globalflags);
    928        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
    929            fixedNewStyle = true;
    930            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
    931        }
    932        if (globalflags & NBD_FLAG_NO_ZEROES) {
    933            if (zeroes) {
    934                *zeroes = false;
    935            }
    936            clientflags |= NBD_FLAG_C_NO_ZEROES;
    937        }
    938        /* client requested flags */
    939        clientflags = cpu_to_be32(clientflags);
    940        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
    941            error_prepend(errp, "Failed to send clientflags field: ");
    942            return -EINVAL;
    943        }
    944        if (tlscreds) {
    945            if (fixedNewStyle) {
    946                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
    947                if (!*outioc) {
    948                    return -EINVAL;
    949                }
    950                ioc = *outioc;
    951                if (aio_context) {
    952                    qio_channel_set_blocking(ioc, false, NULL);
    953                    qio_channel_attach_aio_context(ioc, aio_context);
    954                }
    955            } else {
    956                error_setg(errp, "Server does not support STARTTLS");
    957                return -EINVAL;
    958            }
    959        }
    960        if (fixedNewStyle) {
    961            int result = 0;
    962
    963            if (structured_reply) {
    964                result = nbd_request_simple_option(ioc,
    965                                                   NBD_OPT_STRUCTURED_REPLY,
    966                                                   false, errp);
    967                if (result < 0) {
    968                    return -EINVAL;
    969                }
    970            }
    971            return 2 + result;
    972        } else {
    973            return 1;
    974        }
    975    } else if (magic == NBD_CLIENT_MAGIC) {
    976        if (tlscreds) {
    977            error_setg(errp, "Server does not support STARTTLS");
    978            return -EINVAL;
    979        }
    980        return 0;
    981    } else {
    982        error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
    983        return -EINVAL;
    984    }
    985}
    986
    987/*
    988 * nbd_negotiate_finish_oldstyle:
    989 * Populate @info with the size and export flags from an oldstyle server,
    990 * but does not consume 124 bytes of reserved zero padding.
    991 * Returns 0 on success, -1 with @errp set on failure
    992 */
    993static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
    994                                         Error **errp)
    995{
    996    uint32_t oldflags;
    997
    998    if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
    999        return -EINVAL;
   1000    }
   1001
   1002    if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
   1003        return -EINVAL;
   1004    }
   1005    if (oldflags & ~0xffff) {
   1006        error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
   1007        return -EINVAL;
   1008    }
   1009    info->flags = oldflags;
   1010    return 0;
   1011}
   1012
   1013/*
   1014 * nbd_receive_negotiate:
   1015 * Connect to server, complete negotiation, and move into transmission phase.
   1016 * Returns: negative errno: failure talking to server
   1017 *          0: server is connected
   1018 */
   1019int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
   1020                          QCryptoTLSCreds *tlscreds,
   1021                          const char *hostname, QIOChannel **outioc,
   1022                          NBDExportInfo *info, Error **errp)
   1023{
   1024    ERRP_GUARD();
   1025    int result;
   1026    bool zeroes;
   1027    bool base_allocation = info->base_allocation;
   1028
   1029    assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
   1030    trace_nbd_receive_negotiate_name(info->name);
   1031
   1032    result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
   1033                                 info->structured_reply, &zeroes, errp);
   1034
   1035    info->structured_reply = false;
   1036    info->base_allocation = false;
   1037    if (tlscreds && *outioc) {
   1038        ioc = *outioc;
   1039    }
   1040
   1041    switch (result) {
   1042    case 3: /* newstyle, with structured replies */
   1043        info->structured_reply = true;
   1044        if (base_allocation) {
   1045            result = nbd_negotiate_simple_meta_context(ioc, info, errp);
   1046            if (result < 0) {
   1047                return -EINVAL;
   1048            }
   1049            info->base_allocation = result == 1;
   1050        }
   1051        /* fall through */
   1052    case 2: /* newstyle, try OPT_GO */
   1053        /* Try NBD_OPT_GO first - if it works, we are done (it
   1054         * also gives us a good message if the server requires
   1055         * TLS).  If it is not available, fall back to
   1056         * NBD_OPT_LIST for nicer error messages about a missing
   1057         * export, then use NBD_OPT_EXPORT_NAME.  */
   1058        result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp);
   1059        if (result < 0) {
   1060            return -EINVAL;
   1061        }
   1062        if (result > 0) {
   1063            return 0;
   1064        }
   1065        /* Check our desired export is present in the
   1066         * server export list. Since NBD_OPT_EXPORT_NAME
   1067         * cannot return an error message, running this
   1068         * query gives us better error reporting if the
   1069         * export name is not available.
   1070         */
   1071        if (nbd_receive_query_exports(ioc, info->name, errp) < 0) {
   1072            return -EINVAL;
   1073        }
   1074        /* fall through */
   1075    case 1: /* newstyle, but limited to EXPORT_NAME */
   1076        /* write the export name request */
   1077        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
   1078                                    errp) < 0) {
   1079            return -EINVAL;
   1080        }
   1081
   1082        /* Read the response */
   1083        if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
   1084            return -EINVAL;
   1085        }
   1086
   1087        if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
   1088            return -EINVAL;
   1089        }
   1090        break;
   1091    case 0: /* oldstyle, parse length and flags */
   1092        if (*info->name) {
   1093            error_setg(errp, "Server does not support non-empty export names");
   1094            return -EINVAL;
   1095        }
   1096        if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) {
   1097            return -EINVAL;
   1098        }
   1099        break;
   1100    default:
   1101        return result;
   1102    }
   1103
   1104    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
   1105    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
   1106        error_prepend(errp, "Failed to read reserved block: ");
   1107        return -EINVAL;
   1108    }
   1109    return 0;
   1110}
   1111
   1112/* Clean up result of nbd_receive_export_list */
   1113void nbd_free_export_list(NBDExportInfo *info, int count)
   1114{
   1115    int i, j;
   1116
   1117    if (!info) {
   1118        return;
   1119    }
   1120
   1121    for (i = 0; i < count; i++) {
   1122        g_free(info[i].name);
   1123        g_free(info[i].description);
   1124        for (j = 0; j < info[i].n_contexts; j++) {
   1125            g_free(info[i].contexts[j]);
   1126        }
   1127        g_free(info[i].contexts);
   1128    }
   1129    g_free(info);
   1130}
   1131
   1132/*
   1133 * nbd_receive_export_list:
   1134 * Query details about a server's exports, then disconnect without
   1135 * going into transmission phase. Return a count of the exports listed
   1136 * in @info by the server, or -1 on error. Caller must free @info using
   1137 * nbd_free_export_list().
   1138 */
   1139int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
   1140                            const char *hostname, NBDExportInfo **info,
   1141                            Error **errp)
   1142{
   1143    int result;
   1144    int count = 0;
   1145    int i;
   1146    int rc;
   1147    int ret = -1;
   1148    NBDExportInfo *array = NULL;
   1149    QIOChannel *sioc = NULL;
   1150
   1151    *info = NULL;
   1152    result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
   1153                                 NULL, errp);
   1154    if (tlscreds && sioc) {
   1155        ioc = sioc;
   1156    }
   1157
   1158    switch (result) {
   1159    case 2:
   1160    case 3:
   1161        /* newstyle - use NBD_OPT_LIST to populate array, then try
   1162         * NBD_OPT_INFO on each array member. If structured replies
   1163         * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
   1164        if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
   1165            goto out;
   1166        }
   1167        while (1) {
   1168            char *name;
   1169            char *desc;
   1170
   1171            rc = nbd_receive_list(ioc, &name, &desc, errp);
   1172            if (rc < 0) {
   1173                goto out;
   1174            } else if (rc == 0) {
   1175                break;
   1176            }
   1177            array = g_renew(NBDExportInfo, array, ++count);
   1178            memset(&array[count - 1], 0, sizeof(*array));
   1179            array[count - 1].name = name;
   1180            array[count - 1].description = desc;
   1181            array[count - 1].structured_reply = result == 3;
   1182        }
   1183
   1184        for (i = 0; i < count; i++) {
   1185            array[i].request_sizes = true;
   1186            rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp);
   1187            if (rc < 0) {
   1188                goto out;
   1189            } else if (rc == 0) {
   1190                /*
   1191                 * Pointless to try rest of loop. If OPT_INFO doesn't work,
   1192                 * it's unlikely that meta contexts work either
   1193                 */
   1194                break;
   1195            }
   1196
   1197            if (result == 3 &&
   1198                nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
   1199                goto out;
   1200            }
   1201        }
   1202
   1203        /* Send NBD_OPT_ABORT as a courtesy before hanging up */
   1204        nbd_send_opt_abort(ioc);
   1205        break;
   1206    case 1: /* newstyle, but limited to EXPORT_NAME */
   1207        error_setg(errp, "Server does not support export lists");
   1208        /* We can't even send NBD_OPT_ABORT, so merely hang up */
   1209        goto out;
   1210    case 0: /* oldstyle, parse length and flags */
   1211        array = g_new0(NBDExportInfo, 1);
   1212        array->name = g_strdup("");
   1213        count = 1;
   1214
   1215        if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
   1216            goto out;
   1217        }
   1218
   1219        /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
   1220         * errors now that we have the information we wanted. */
   1221        if (nbd_drop(ioc, 124, NULL) == 0) {
   1222            NBDRequest request = { .type = NBD_CMD_DISC };
   1223
   1224            nbd_send_request(ioc, &request);
   1225        }
   1226        break;
   1227    default:
   1228        goto out;
   1229    }
   1230
   1231    *info = array;
   1232    array = NULL;
   1233    ret = count;
   1234
   1235 out:
   1236    qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
   1237    qio_channel_close(ioc, NULL);
   1238    object_unref(OBJECT(sioc));
   1239    nbd_free_export_list(array, count);
   1240    return ret;
   1241}
   1242
   1243#ifdef __linux__
   1244int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
   1245             Error **errp)
   1246{
   1247    unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
   1248    unsigned long sectors = info->size / sector_size;
   1249
   1250    /* FIXME: Once the kernel module is patched to honor block sizes,
   1251     * and to advertise that fact to user space, we should update the
   1252     * hand-off to the kernel to use any block sizes we learned. */
   1253    assert(!info->request_sizes);
   1254    if (info->size / sector_size != sectors) {
   1255        error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
   1256                   info->size);
   1257        return -E2BIG;
   1258    }
   1259
   1260    trace_nbd_init_set_socket();
   1261
   1262    if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
   1263        int serrno = errno;
   1264        error_setg(errp, "Failed to set NBD socket");
   1265        return -serrno;
   1266    }
   1267
   1268    trace_nbd_init_set_block_size(sector_size);
   1269
   1270    if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
   1271        int serrno = errno;
   1272        error_setg(errp, "Failed setting NBD block size");
   1273        return -serrno;
   1274    }
   1275
   1276    trace_nbd_init_set_size(sectors);
   1277    if (info->size % sector_size) {
   1278        trace_nbd_init_trailing_bytes(info->size % sector_size);
   1279    }
   1280
   1281    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
   1282        int serrno = errno;
   1283        error_setg(errp, "Failed setting size (in blocks)");
   1284        return -serrno;
   1285    }
   1286
   1287    if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
   1288        if (errno == ENOTTY) {
   1289            int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
   1290            trace_nbd_init_set_readonly();
   1291
   1292            if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
   1293                int serrno = errno;
   1294                error_setg(errp, "Failed setting read-only attribute");
   1295                return -serrno;
   1296            }
   1297        } else {
   1298            int serrno = errno;
   1299            error_setg(errp, "Failed setting flags");
   1300            return -serrno;
   1301        }
   1302    }
   1303
   1304    trace_nbd_init_finish();
   1305
   1306    return 0;
   1307}
   1308
   1309int nbd_client(int fd)
   1310{
   1311    int ret;
   1312    int serrno;
   1313
   1314    trace_nbd_client_loop();
   1315
   1316    ret = ioctl(fd, NBD_DO_IT);
   1317    if (ret < 0 && errno == EPIPE) {
   1318        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
   1319         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
   1320         * that case.
   1321         */
   1322        ret = 0;
   1323    }
   1324    serrno = errno;
   1325
   1326    trace_nbd_client_loop_ret(ret, strerror(serrno));
   1327
   1328    trace_nbd_client_clear_queue();
   1329    ioctl(fd, NBD_CLEAR_QUE);
   1330
   1331    trace_nbd_client_clear_socket();
   1332    ioctl(fd, NBD_CLEAR_SOCK);
   1333
   1334    errno = serrno;
   1335    return ret;
   1336}
   1337
   1338int nbd_disconnect(int fd)
   1339{
   1340    ioctl(fd, NBD_CLEAR_QUE);
   1341    ioctl(fd, NBD_DISCONNECT);
   1342    ioctl(fd, NBD_CLEAR_SOCK);
   1343    return 0;
   1344}
   1345
   1346#endif /* __linux__ */
   1347
   1348int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
   1349{
   1350    uint8_t buf[NBD_REQUEST_SIZE];
   1351
   1352    trace_nbd_send_request(request->from, request->len, request->handle,
   1353                           request->flags, request->type,
   1354                           nbd_cmd_lookup(request->type));
   1355
   1356    stl_be_p(buf, NBD_REQUEST_MAGIC);
   1357    stw_be_p(buf + 4, request->flags);
   1358    stw_be_p(buf + 6, request->type);
   1359    stq_be_p(buf + 8, request->handle);
   1360    stq_be_p(buf + 16, request->from);
   1361    stl_be_p(buf + 24, request->len);
   1362
   1363    return nbd_write(ioc, buf, sizeof(buf), NULL);
   1364}
   1365
   1366/* nbd_receive_simple_reply
   1367 * Read simple reply except magic field (which should be already read).
   1368 * Payload is not read (payload is possible for CMD_READ, but here we even
   1369 * don't know whether it take place or not).
   1370 */
   1371static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
   1372                                    Error **errp)
   1373{
   1374    int ret;
   1375
   1376    assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
   1377
   1378    ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
   1379                   sizeof(*reply) - sizeof(reply->magic), "reply", errp);
   1380    if (ret < 0) {
   1381        return ret;
   1382    }
   1383
   1384    reply->error = be32_to_cpu(reply->error);
   1385    reply->handle = be64_to_cpu(reply->handle);
   1386
   1387    return 0;
   1388}
   1389
   1390/* nbd_receive_structured_reply_chunk
   1391 * Read structured reply chunk except magic field (which should be already
   1392 * read).
   1393 * Payload is not read.
   1394 */
   1395static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
   1396                                              NBDStructuredReplyChunk *chunk,
   1397                                              Error **errp)
   1398{
   1399    int ret;
   1400
   1401    assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
   1402
   1403    ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
   1404                   sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
   1405                   errp);
   1406    if (ret < 0) {
   1407        return ret;
   1408    }
   1409
   1410    chunk->flags = be16_to_cpu(chunk->flags);
   1411    chunk->type = be16_to_cpu(chunk->type);
   1412    chunk->handle = be64_to_cpu(chunk->handle);
   1413    chunk->length = be32_to_cpu(chunk->length);
   1414
   1415    return 0;
   1416}
   1417
   1418/* nbd_read_eof
   1419 * Tries to read @size bytes from @ioc.
   1420 * Returns 1 on success
   1421 *         0 on eof, when no data was read (errp is not set)
   1422 *         negative errno on failure (errp is set)
   1423 */
   1424static inline int coroutine_fn
   1425nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
   1426             Error **errp)
   1427{
   1428    bool partial = false;
   1429
   1430    assert(size);
   1431    while (size > 0) {
   1432        struct iovec iov = { .iov_base = buffer, .iov_len = size };
   1433        ssize_t len;
   1434
   1435        len = qio_channel_readv(ioc, &iov, 1, errp);
   1436        if (len == QIO_CHANNEL_ERR_BLOCK) {
   1437            qio_channel_yield(ioc, G_IO_IN);
   1438            continue;
   1439        } else if (len < 0) {
   1440            return -EIO;
   1441        } else if (len == 0) {
   1442            if (partial) {
   1443                error_setg(errp,
   1444                           "Unexpected end-of-file before all bytes were read");
   1445                return -EIO;
   1446            } else {
   1447                return 0;
   1448            }
   1449        }
   1450
   1451        partial = true;
   1452        size -= len;
   1453        buffer = (uint8_t*) buffer + len;
   1454    }
   1455    return 1;
   1456}
   1457
   1458/* nbd_receive_reply
   1459 *
   1460 * Decreases bs->in_flight while waiting for a new reply. This yield is where
   1461 * we wait indefinitely and the coroutine must be able to be safely reentered
   1462 * for nbd_client_attach_aio_context().
   1463 *
   1464 * Returns 1 on success
   1465 *         0 on eof, when no data was read (errp is not set)
   1466 *         negative errno on failure (errp is set)
   1467 */
   1468int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
   1469                                   NBDReply *reply, Error **errp)
   1470{
   1471    int ret;
   1472    const char *type;
   1473
   1474    ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
   1475    if (ret <= 0) {
   1476        return ret;
   1477    }
   1478
   1479    reply->magic = be32_to_cpu(reply->magic);
   1480
   1481    switch (reply->magic) {
   1482    case NBD_SIMPLE_REPLY_MAGIC:
   1483        ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
   1484        if (ret < 0) {
   1485            break;
   1486        }
   1487        trace_nbd_receive_simple_reply(reply->simple.error,
   1488                                       nbd_err_lookup(reply->simple.error),
   1489                                       reply->handle);
   1490        break;
   1491    case NBD_STRUCTURED_REPLY_MAGIC:
   1492        ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
   1493        if (ret < 0) {
   1494            break;
   1495        }
   1496        type = nbd_reply_type_lookup(reply->structured.type);
   1497        trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
   1498                                                 reply->structured.type, type,
   1499                                                 reply->structured.handle,
   1500                                                 reply->structured.length);
   1501        break;
   1502    default:
   1503        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
   1504        return -EINVAL;
   1505    }
   1506    if (ret < 0) {
   1507        return ret;
   1508    }
   1509
   1510    return 1;
   1511}
   1512