cachepc-qemu

Fork of AMDESE/qemu with changes for cachepc side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-qemu
Log | Files | Refs | Submodules | LICENSE | sfeed.txt

qemu-pr-helper.c (30655B)


      1/*
      2 * Privileged helper to handle persistent reservation commands for QEMU
      3 *
      4 * Copyright (C) 2017 Red Hat, Inc. <pbonzini@redhat.com>
      5 *
      6 * Author: Paolo Bonzini <pbonzini@redhat.com>
      7 *
      8 * This program is free software; you can redistribute it and/or modify
      9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation; under version 2 of the License.
     11 *
     12 * This program is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
     19 */
     20
     21#include "qemu/osdep.h"
     22#include <getopt.h>
     23#include <sys/ioctl.h>
     24#include <linux/dm-ioctl.h>
     25#include <scsi/sg.h>
     26
     27#ifdef CONFIG_LIBCAP_NG
     28#include <cap-ng.h>
     29#endif
     30#include <pwd.h>
     31#include <grp.h>
     32
     33#ifdef CONFIG_MPATH
     34#include <libudev.h>
     35#include <mpath_cmd.h>
     36#include <mpath_persist.h>
     37#endif
     38
     39#include "qemu-common.h"
     40#include "qapi/error.h"
     41#include "qemu/cutils.h"
     42#include "qemu/main-loop.h"
     43#include "qemu/module.h"
     44#include "qemu/error-report.h"
     45#include "qemu/config-file.h"
     46#include "qemu/bswap.h"
     47#include "qemu/log.h"
     48#include "qemu/systemd.h"
     49#include "qapi/util.h"
     50#include "qapi/qmp/qstring.h"
     51#include "io/channel-socket.h"
     52#include "trace/control.h"
     53#include "qemu-version.h"
     54
     55#include "block/aio.h"
     56#include "block/thread-pool.h"
     57
     58#include "scsi/constants.h"
     59#include "scsi/utils.h"
     60#include "pr-helper.h"
     61
     62#define PR_OUT_FIXED_PARAM_SIZE 24
     63
     64static char *socket_path;
     65static char *pidfile;
     66static enum { RUNNING, TERMINATE, TERMINATING } state;
     67static QIOChannelSocket *server_ioc;
     68static int server_watch;
     69static int num_active_sockets = 1;
     70static int noisy;
     71static int verbose;
     72
     73#ifdef CONFIG_LIBCAP_NG
     74static int uid = -1;
     75static int gid = -1;
     76#endif
     77
     78static void compute_default_paths(void)
     79{
     80    socket_path = qemu_get_local_state_pathname("run/qemu-pr-helper.sock");
     81    pidfile = qemu_get_local_state_pathname("run/qemu-pr-helper.pid");
     82}
     83
     84static void usage(const char *name)
     85{
     86    (printf) (
     87"Usage: %s [OPTIONS] FILE\n"
     88"Persistent Reservation helper program for QEMU\n"
     89"\n"
     90"  -h, --help                display this help and exit\n"
     91"  -V, --version             output version information and exit\n"
     92"\n"
     93"  -d, --daemon              run in the background\n"
     94"  -f, --pidfile=PATH        PID file when running as a daemon\n"
     95"                            (default '%s')\n"
     96"  -k, --socket=PATH         path to the unix socket\n"
     97"                            (default '%s')\n"
     98"  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
     99"                            specify tracing options\n"
    100#ifdef CONFIG_LIBCAP_NG
    101"  -u, --user=USER           user to drop privileges to\n"
    102"  -g, --group=GROUP         group to drop privileges to\n"
    103#endif
    104"\n"
    105QEMU_HELP_BOTTOM "\n"
    106    , name, pidfile, socket_path);
    107}
    108
    109static void version(const char *name)
    110{
    111    printf(
    112"%s " QEMU_FULL_VERSION "\n"
    113"Written by Paolo Bonzini.\n"
    114"\n"
    115QEMU_COPYRIGHT "\n"
    116"This is free software; see the source for copying conditions.  There is NO\n"
    117"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
    118    , name);
    119}
    120
    121/* SG_IO support */
    122
    123typedef struct PRHelperSGIOData {
    124    int fd;
    125    const uint8_t *cdb;
    126    uint8_t *sense;
    127    uint8_t *buf;
    128    int sz;              /* input/output */
    129    int dir;
    130} PRHelperSGIOData;
    131
    132static int do_sgio_worker(void *opaque)
    133{
    134    PRHelperSGIOData *data = opaque;
    135    struct sg_io_hdr io_hdr;
    136    int ret;
    137    int status;
    138    SCSISense sense_code;
    139
    140    memset(data->sense, 0, PR_HELPER_SENSE_SIZE);
    141    memset(&io_hdr, 0, sizeof(io_hdr));
    142    io_hdr.interface_id = 'S';
    143    io_hdr.cmd_len = PR_HELPER_CDB_SIZE;
    144    io_hdr.cmdp = (uint8_t *)data->cdb;
    145    io_hdr.sbp = data->sense;
    146    io_hdr.mx_sb_len = PR_HELPER_SENSE_SIZE;
    147    io_hdr.timeout = 1;
    148    io_hdr.dxfer_direction = data->dir;
    149    io_hdr.dxferp = (char *)data->buf;
    150    io_hdr.dxfer_len = data->sz;
    151    ret = ioctl(data->fd, SG_IO, &io_hdr);
    152
    153    if (ret < 0) {
    154        status = scsi_sense_from_errno(errno, &sense_code);
    155        if (status == CHECK_CONDITION) {
    156            scsi_build_sense(data->sense, sense_code);
    157        }
    158    } else if (io_hdr.host_status != SCSI_HOST_OK) {
    159        status = scsi_sense_from_host_status(io_hdr.host_status, &sense_code);
    160        if (status == CHECK_CONDITION) {
    161            scsi_build_sense(data->sense, sense_code);
    162        }
    163    } else if (io_hdr.driver_status & SG_ERR_DRIVER_TIMEOUT) {
    164        status = BUSY;
    165    } else {
    166        status = io_hdr.status;
    167    }
    168
    169    if (status == GOOD) {
    170        data->sz -= io_hdr.resid;
    171    } else {
    172        data->sz = 0;
    173    }
    174
    175    return status;
    176}
    177
    178static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
    179                    uint8_t *buf, int *sz, int dir)
    180{
    181    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
    182    int r;
    183
    184    PRHelperSGIOData data = {
    185        .fd = fd,
    186        .cdb = cdb,
    187        .sense = sense,
    188        .buf = buf,
    189        .sz = *sz,
    190        .dir = dir,
    191    };
    192
    193    r = thread_pool_submit_co(pool, do_sgio_worker, &data);
    194    *sz = data.sz;
    195    return r;
    196}
    197
    198/* Device mapper interface */
    199
    200#ifdef CONFIG_MPATH
    201#define CONTROL_PATH "/dev/mapper/control"
    202
    203typedef struct DMData {
    204    struct dm_ioctl dm;
    205    uint8_t data[1024];
    206} DMData;
    207
    208static int control_fd;
    209
    210static void *dm_ioctl(int ioc, struct dm_ioctl *dm)
    211{
    212    static DMData d;
    213    memcpy(&d.dm, dm, sizeof(d.dm));
    214    QEMU_BUILD_BUG_ON(sizeof(d.data) < sizeof(struct dm_target_spec));
    215
    216    d.dm.version[0] = DM_VERSION_MAJOR;
    217    d.dm.version[1] = 0;
    218    d.dm.version[2] = 0;
    219    d.dm.data_size = 1024;
    220    d.dm.data_start = offsetof(DMData, data);
    221    if (ioctl(control_fd, ioc, &d) < 0) {
    222        return NULL;
    223    }
    224    memcpy(dm, &d.dm, sizeof(d.dm));
    225    return &d.data;
    226}
    227
    228static void *dm_dev_ioctl(int fd, int ioc, struct dm_ioctl *dm)
    229{
    230    struct stat st;
    231    int r;
    232
    233    r = fstat(fd, &st);
    234    if (r < 0) {
    235        perror("fstat");
    236        exit(1);
    237    }
    238
    239    dm->dev = st.st_rdev;
    240    return dm_ioctl(ioc, dm);
    241}
    242
    243static void dm_init(void)
    244{
    245    control_fd = open(CONTROL_PATH, O_RDWR);
    246    if (control_fd < 0) {
    247        perror("Cannot open " CONTROL_PATH);
    248        exit(1);
    249    }
    250    struct dm_ioctl dm = { };
    251    if (!dm_ioctl(DM_VERSION, &dm)) {
    252        perror("ioctl");
    253        exit(1);
    254    }
    255    if (dm.version[0] != DM_VERSION_MAJOR) {
    256        fprintf(stderr, "Unsupported device mapper interface");
    257        exit(1);
    258    }
    259}
    260
    261/* Variables required by libmultipath and libmpathpersist.  */
    262QEMU_BUILD_BUG_ON(PR_HELPER_DATA_SIZE > MPATH_MAX_PARAM_LEN);
    263static struct config *multipath_conf;
    264unsigned mpath_mx_alloc_len = PR_HELPER_DATA_SIZE;
    265int logsink;
    266struct udev *udev;
    267
    268extern struct config *get_multipath_config(void);
    269struct config *get_multipath_config(void)
    270{
    271    return multipath_conf;
    272}
    273
    274extern void put_multipath_config(struct config *conf);
    275void put_multipath_config(struct config *conf)
    276{
    277}
    278
    279static void multipath_pr_init(void)
    280{
    281    udev = udev_new();
    282#ifdef CONFIG_MPATH_NEW_API
    283    multipath_conf = mpath_lib_init();
    284#else
    285    mpath_lib_init(udev);
    286#endif
    287}
    288
    289static int is_mpath(int fd)
    290{
    291    struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG };
    292    struct dm_target_spec *tgt;
    293
    294    tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm);
    295    if (!tgt) {
    296        if (errno == ENXIO) {
    297            return 0;
    298        }
    299        perror("ioctl");
    300        exit(EXIT_FAILURE);
    301    }
    302    return !strncmp(tgt->target_type, "multipath", DM_MAX_TYPE_NAME);
    303}
    304
    305static SCSISense mpath_generic_sense(int r)
    306{
    307    switch (r) {
    308    case MPATH_PR_SENSE_NOT_READY:
    309         return SENSE_CODE(NOT_READY);
    310    case MPATH_PR_SENSE_MEDIUM_ERROR:
    311         return SENSE_CODE(READ_ERROR);
    312    case MPATH_PR_SENSE_HARDWARE_ERROR:
    313         return SENSE_CODE(TARGET_FAILURE);
    314    case MPATH_PR_SENSE_ABORTED_COMMAND:
    315         return SENSE_CODE(IO_ERROR);
    316    default:
    317         abort();
    318    }
    319}
    320
    321static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
    322{
    323    switch (r) {
    324    case MPATH_PR_SUCCESS:
    325        return GOOD;
    326    case MPATH_PR_SENSE_NOT_READY:
    327    case MPATH_PR_SENSE_MEDIUM_ERROR:
    328    case MPATH_PR_SENSE_HARDWARE_ERROR:
    329    case MPATH_PR_SENSE_ABORTED_COMMAND:
    330        {
    331            /* libmpathpersist ate the exact sense.  Try to find it by
    332             * issuing TEST UNIT READY.
    333             */
    334            uint8_t cdb[6] = { TEST_UNIT_READY };
    335            int sz = 0;
    336            int ret = do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE);
    337
    338            if (ret != GOOD) {
    339                return ret;
    340            }
    341            scsi_build_sense(sense, mpath_generic_sense(r));
    342            return CHECK_CONDITION;
    343        }
    344
    345    case MPATH_PR_SENSE_UNIT_ATTENTION:
    346        /* Congratulations libmpathpersist, you ruined the Unit Attention...
    347         * Return a heavyweight one.
    348         */
    349        scsi_build_sense(sense, SENSE_CODE(SCSI_BUS_RESET));
    350        return CHECK_CONDITION;
    351    case MPATH_PR_SENSE_INVALID_OP:
    352        /* Only one valid sense.  */
    353        scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
    354        return CHECK_CONDITION;
    355    case MPATH_PR_ILLEGAL_REQ:
    356        /* Guess.  */
    357        scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
    358        return CHECK_CONDITION;
    359    case MPATH_PR_NO_SENSE:
    360        scsi_build_sense(sense, SENSE_CODE(NO_SENSE));
    361        return CHECK_CONDITION;
    362
    363    case MPATH_PR_RESERV_CONFLICT:
    364        return RESERVATION_CONFLICT;
    365
    366    case MPATH_PR_OTHER:
    367    default:
    368        scsi_build_sense(sense, SENSE_CODE(LUN_COMM_FAILURE));
    369        return CHECK_CONDITION;
    370    }
    371}
    372
    373static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
    374                           uint8_t *data, int sz)
    375{
    376    int rq_servact = cdb[1];
    377    struct prin_resp resp;
    378    size_t written;
    379    int r;
    380
    381    switch (rq_servact) {
    382    case MPATH_PRIN_RKEY_SA:
    383    case MPATH_PRIN_RRES_SA:
    384    case MPATH_PRIN_RCAP_SA:
    385        break;
    386    case MPATH_PRIN_RFSTAT_SA:
    387        /* Nobody implements it anyway, so bail out. */
    388    default:
    389        /* Cannot parse any other output.  */
    390        scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
    391        return CHECK_CONDITION;
    392    }
    393
    394    r = mpath_persistent_reserve_in(fd, rq_servact, &resp, noisy, verbose);
    395    if (r == MPATH_PR_SUCCESS) {
    396        switch (rq_servact) {
    397        case MPATH_PRIN_RKEY_SA:
    398        case MPATH_PRIN_RRES_SA: {
    399            struct prin_readdescr *out = &resp.prin_descriptor.prin_readkeys;
    400            assert(sz >= 8);
    401            written = MIN(out->additional_length + 8, sz);
    402            stl_be_p(&data[0], out->prgeneration);
    403            stl_be_p(&data[4], out->additional_length);
    404            memcpy(&data[8], out->key_list, written - 8);
    405            break;
    406        }
    407        case MPATH_PRIN_RCAP_SA: {
    408            struct prin_capdescr *out = &resp.prin_descriptor.prin_readcap;
    409            assert(sz >= 6);
    410            written = 6;
    411            stw_be_p(&data[0], out->length);
    412            data[2] = out->flags[0];
    413            data[3] = out->flags[1];
    414            stw_be_p(&data[4], out->pr_type_mask);
    415            break;
    416        }
    417        default:
    418            scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
    419            return CHECK_CONDITION;
    420        }
    421        assert(written <= sz);
    422        memset(data + written, 0, sz - written);
    423    }
    424
    425    return mpath_reconstruct_sense(fd, r, sense);
    426}
    427
    428static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
    429                            const uint8_t *param, int sz)
    430{
    431    int rq_servact = cdb[1];
    432    int rq_scope = cdb[2] >> 4;
    433    int rq_type = cdb[2] & 0xf;
    434    g_autofree struct prout_param_descriptor *paramp = NULL;
    435    char transportids[PR_HELPER_DATA_SIZE];
    436    int r;
    437
    438    paramp = g_malloc0(sizeof(struct prout_param_descriptor)
    439                       + sizeof(struct transportid *) * MPATH_MX_TIDS);
    440
    441    if (sz < PR_OUT_FIXED_PARAM_SIZE) {
    442        /* Illegal request, Parameter list length error.  This isn't fatal;
    443         * we have read the data, send an error without closing the socket.
    444         */
    445        scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
    446        return CHECK_CONDITION;
    447    }
    448
    449    switch (rq_servact) {
    450    case MPATH_PROUT_REG_SA:
    451    case MPATH_PROUT_RES_SA:
    452    case MPATH_PROUT_REL_SA:
    453    case MPATH_PROUT_CLEAR_SA:
    454    case MPATH_PROUT_PREE_SA:
    455    case MPATH_PROUT_PREE_AB_SA:
    456    case MPATH_PROUT_REG_IGN_SA:
    457        break;
    458    case MPATH_PROUT_REG_MOV_SA:
    459        /* Not supported by struct prout_param_descriptor.  */
    460    default:
    461        /* Cannot parse any other input.  */
    462        scsi_build_sense(sense, SENSE_CODE(INVALID_FIELD));
    463        return CHECK_CONDITION;
    464    }
    465
    466    /* Convert input data, especially transport IDs, to the structs
    467     * used by libmpathpersist (which, of course, will immediately
    468     * do the opposite).
    469     */
    470    memcpy(&paramp->key, &param[0], 8);
    471    memcpy(&paramp->sa_key, &param[8], 8);
    472    paramp->sa_flags = param[20];
    473    if (sz > PR_OUT_FIXED_PARAM_SIZE) {
    474        size_t transportid_len;
    475        int i, j;
    476        if (sz < PR_OUT_FIXED_PARAM_SIZE + 4) {
    477            scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM_LEN));
    478            return CHECK_CONDITION;
    479        }
    480        transportid_len = ldl_be_p(&param[24]) + PR_OUT_FIXED_PARAM_SIZE + 4;
    481        if (transportid_len > sz) {
    482            scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
    483            return CHECK_CONDITION;
    484        }
    485        for (i = PR_OUT_FIXED_PARAM_SIZE + 4, j = 0; i < transportid_len; ) {
    486            struct transportid *id = (struct transportid *) &transportids[j];
    487            int len;
    488
    489            id->format_code = param[i] & 0xc0;
    490            id->protocol_id = param[i] & 0x0f;
    491            switch (param[i] & 0xcf) {
    492            case 0:
    493                /* FC transport.  */
    494                if (i + 24 > transportid_len) {
    495                    goto illegal_req;
    496                }
    497                memcpy(id->n_port_name, &param[i + 8], 8);
    498                j += offsetof(struct transportid, n_port_name[8]);
    499                i += 24;
    500                break;
    501            case 5:
    502            case 0x45:
    503                /* iSCSI transport.  */
    504                len = lduw_be_p(&param[i + 2]);
    505                if (len > 252 || (len & 3) || i + len + 4 > transportid_len) {
    506                    /* For format code 00, the standard says the maximum is 223
    507                     * plus the NUL terminator.  For format code 01 there is no
    508                     * maximum length, but libmpathpersist ignores the first
    509                     * byte of id->iscsi_name so our maximum is 252.
    510                     */
    511                    goto illegal_req;
    512                }
    513                if (memchr(&param[i + 4], 0, len) == NULL) {
    514                    goto illegal_req;
    515                }
    516                memcpy(id->iscsi_name, &param[i + 2], len + 2);
    517                j += offsetof(struct transportid, iscsi_name[len + 2]);
    518                i += len + 4;
    519                break;
    520            case 6:
    521                /* SAS transport.  */
    522                if (i + 24 > transportid_len) {
    523                    goto illegal_req;
    524                }
    525                memcpy(id->sas_address, &param[i + 4], 8);
    526                j += offsetof(struct transportid, sas_address[8]);
    527                i += 24;
    528                break;
    529            default:
    530            illegal_req:
    531                scsi_build_sense(sense, SENSE_CODE(INVALID_PARAM));
    532                return CHECK_CONDITION;
    533            }
    534
    535            assert(paramp->num_transportid < MPATH_MX_TIDS);
    536            paramp->trnptid_list[paramp->num_transportid++] = id;
    537        }
    538    }
    539
    540    r = mpath_persistent_reserve_out(fd, rq_servact, rq_scope, rq_type,
    541                                     paramp, noisy, verbose);
    542    return mpath_reconstruct_sense(fd, r, sense);
    543}
    544#endif
    545
    546static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
    547                    uint8_t *data, int *resp_sz)
    548{
    549#ifdef CONFIG_MPATH
    550    if (is_mpath(fd)) {
    551        /* multipath_pr_in fills the whole input buffer.  */
    552        int r = multipath_pr_in(fd, cdb, sense, data, *resp_sz);
    553        if (r != GOOD) {
    554            *resp_sz = 0;
    555        }
    556        return r;
    557    }
    558#endif
    559
    560    return do_sgio(fd, cdb, sense, data, resp_sz,
    561                   SG_DXFER_FROM_DEV);
    562}
    563
    564static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
    565                     const uint8_t *param, int sz)
    566{
    567    int resp_sz;
    568
    569    if ((fcntl(fd, F_GETFL) & O_ACCMODE) == O_RDONLY) {
    570        scsi_build_sense(sense, SENSE_CODE(INVALID_OPCODE));
    571        return CHECK_CONDITION;
    572    }
    573
    574#ifdef CONFIG_MPATH
    575    if (is_mpath(fd)) {
    576        return multipath_pr_out(fd, cdb, sense, param, sz);
    577    }
    578#endif
    579
    580    resp_sz = sz;
    581    return do_sgio(fd, cdb, sense, (uint8_t *)param, &resp_sz,
    582                   SG_DXFER_TO_DEV);
    583}
    584
    585/* Client */
    586
    587typedef struct PRHelperClient {
    588    QIOChannelSocket *ioc;
    589    Coroutine *co;
    590    int fd;
    591    uint8_t data[PR_HELPER_DATA_SIZE];
    592} PRHelperClient;
    593
    594typedef struct PRHelperRequest {
    595    int fd;
    596    size_t sz;
    597    uint8_t cdb[PR_HELPER_CDB_SIZE];
    598} PRHelperRequest;
    599
    600static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
    601                                 Error **errp)
    602{
    603    int ret = 0;
    604
    605    while (sz > 0) {
    606        int *fds = NULL;
    607        size_t nfds = 0;
    608        int i;
    609        struct iovec iov;
    610        ssize_t n_read;
    611
    612        iov.iov_base = buf;
    613        iov.iov_len = sz;
    614        n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
    615                                        &fds, &nfds, errp);
    616
    617        if (n_read == QIO_CHANNEL_ERR_BLOCK) {
    618            qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
    619            continue;
    620        }
    621        if (n_read <= 0) {
    622            ret = n_read ? n_read : -1;
    623            goto err;
    624        }
    625
    626        /* Stash one file descriptor per request.  */
    627        if (nfds) {
    628            bool too_many = false;
    629            for (i = 0; i < nfds; i++) {
    630                if (client->fd == -1) {
    631                    client->fd = fds[i];
    632                } else {
    633                    close(fds[i]);
    634                    too_many = true;
    635                }
    636            }
    637            g_free(fds);
    638            if (too_many) {
    639                ret = -1;
    640                goto err;
    641            }
    642        }
    643
    644        buf += n_read;
    645        sz -= n_read;
    646    }
    647
    648    return 0;
    649
    650err:
    651    if (client->fd != -1) {
    652        close(client->fd);
    653        client->fd = -1;
    654    }
    655    return ret;
    656}
    657
    658static int coroutine_fn prh_read_request(PRHelperClient *client,
    659                                         PRHelperRequest *req,
    660                                         PRHelperResponse *resp, Error **errp)
    661{
    662    uint32_t sz;
    663
    664    if (prh_read(client, req->cdb, sizeof(req->cdb), NULL) < 0) {
    665        return -1;
    666    }
    667
    668    if (client->fd == -1) {
    669        error_setg(errp, "No file descriptor in request.");
    670        return -1;
    671    }
    672
    673    if (req->cdb[0] != PERSISTENT_RESERVE_OUT &&
    674        req->cdb[0] != PERSISTENT_RESERVE_IN) {
    675        error_setg(errp, "Invalid CDB, closing socket.");
    676        goto out_close;
    677    }
    678
    679    sz = scsi_cdb_xfer(req->cdb);
    680    if (sz > sizeof(client->data)) {
    681        goto out_close;
    682    }
    683
    684    if (req->cdb[0] == PERSISTENT_RESERVE_OUT) {
    685        if (qio_channel_read_all(QIO_CHANNEL(client->ioc),
    686                                 (char *)client->data, sz,
    687                                 errp) < 0) {
    688            goto out_close;
    689        }
    690    }
    691
    692    req->fd = client->fd;
    693    req->sz = sz;
    694    client->fd = -1;
    695    return sz;
    696
    697out_close:
    698    close(client->fd);
    699    client->fd = -1;
    700    return -1;
    701}
    702
    703static int coroutine_fn prh_write_response(PRHelperClient *client,
    704                                           PRHelperRequest *req,
    705                                           PRHelperResponse *resp, Error **errp)
    706{
    707    ssize_t r;
    708    size_t sz;
    709
    710    if (req->cdb[0] == PERSISTENT_RESERVE_IN && resp->result == GOOD) {
    711        assert(resp->sz <= req->sz && resp->sz <= sizeof(client->data));
    712    } else {
    713        assert(resp->sz == 0);
    714    }
    715
    716    sz = resp->sz;
    717
    718    resp->result = cpu_to_be32(resp->result);
    719    resp->sz = cpu_to_be32(resp->sz);
    720    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
    721                              (char *) resp, sizeof(*resp), errp);
    722    if (r < 0) {
    723        return r;
    724    }
    725
    726    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
    727                              (char *) client->data,
    728                              sz, errp);
    729    return r < 0 ? r : 0;
    730}
    731
    732static void coroutine_fn prh_co_entry(void *opaque)
    733{
    734    PRHelperClient *client = opaque;
    735    Error *local_err = NULL;
    736    uint32_t flags;
    737    int r;
    738
    739    qio_channel_set_blocking(QIO_CHANNEL(client->ioc),
    740                             false, NULL);
    741    qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc),
    742                                   qemu_get_aio_context());
    743
    744    /* A very simple negotiation for future extensibility.  No features
    745     * are defined so write 0.
    746     */
    747    flags = cpu_to_be32(0);
    748    r = qio_channel_write_all(QIO_CHANNEL(client->ioc),
    749                             (char *) &flags, sizeof(flags), NULL);
    750    if (r < 0) {
    751        goto out;
    752    }
    753
    754    r = qio_channel_read_all(QIO_CHANNEL(client->ioc),
    755                             (char *) &flags, sizeof(flags), NULL);
    756    if (be32_to_cpu(flags) != 0 || r < 0) {
    757        goto out;
    758    }
    759
    760    while (qatomic_read(&state) == RUNNING) {
    761        PRHelperRequest req;
    762        PRHelperResponse resp;
    763        int sz;
    764
    765        sz = prh_read_request(client, &req, &resp, &local_err);
    766        if (sz < 0) {
    767            break;
    768        }
    769
    770        num_active_sockets++;
    771        if (req.cdb[0] == PERSISTENT_RESERVE_OUT) {
    772            r = do_pr_out(req.fd, req.cdb, resp.sense,
    773                          client->data, sz);
    774            resp.sz = 0;
    775        } else {
    776            resp.sz = sizeof(client->data);
    777            r = do_pr_in(req.fd, req.cdb, resp.sense,
    778                         client->data, &resp.sz);
    779            resp.sz = MIN(resp.sz, sz);
    780        }
    781        num_active_sockets--;
    782        close(req.fd);
    783        if (r == -1) {
    784            break;
    785        }
    786        resp.result = r;
    787
    788        if (prh_write_response(client, &req, &resp, &local_err) < 0) {
    789            break;
    790        }
    791    }
    792
    793    if (local_err) {
    794        if (verbose == 0) {
    795            error_free(local_err);
    796        } else {
    797            error_report_err(local_err);
    798        }
    799    }
    800
    801out:
    802    qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
    803    object_unref(OBJECT(client->ioc));
    804    g_free(client);
    805}
    806
    807static gboolean accept_client(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
    808{
    809    QIOChannelSocket *cioc;
    810    PRHelperClient *prh;
    811
    812    cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
    813                                     NULL);
    814    if (!cioc) {
    815        return TRUE;
    816    }
    817
    818    prh = g_new(PRHelperClient, 1);
    819    prh->ioc = cioc;
    820    prh->fd = -1;
    821    prh->co = qemu_coroutine_create(prh_co_entry, prh);
    822    qemu_coroutine_enter(prh->co);
    823
    824    return TRUE;
    825}
    826
    827static void termsig_handler(int signum)
    828{
    829    qatomic_cmpxchg(&state, RUNNING, TERMINATE);
    830    qemu_notify_event();
    831}
    832
    833static void close_server_socket(void)
    834{
    835    assert(server_ioc);
    836
    837    g_source_remove(server_watch);
    838    server_watch = -1;
    839    object_unref(OBJECT(server_ioc));
    840    num_active_sockets--;
    841}
    842
    843#ifdef CONFIG_LIBCAP_NG
    844static int drop_privileges(void)
    845{
    846    /* clear all capabilities */
    847    capng_clear(CAPNG_SELECT_BOTH);
    848
    849    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
    850                     CAP_SYS_RAWIO) < 0) {
    851        return -1;
    852    }
    853
    854#ifdef CONFIG_MPATH
    855    /* For /dev/mapper/control ioctls */
    856    if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE | CAPNG_PERMITTED,
    857                     CAP_SYS_ADMIN) < 0) {
    858        return -1;
    859    }
    860#endif
    861
    862    /* Change user/group id, retaining the capabilities.  Because file descriptors
    863     * are passed via SCM_RIGHTS, we don't need supplementary groups (and in
    864     * fact the helper can run as "nobody").
    865     */
    866    if (capng_change_id(uid != -1 ? uid : getuid(),
    867                        gid != -1 ? gid : getgid(),
    868                        CAPNG_DROP_SUPP_GRP | CAPNG_CLEAR_BOUNDING)) {
    869        return -1;
    870    }
    871
    872    return 0;
    873}
    874#endif
    875
    876int main(int argc, char **argv)
    877{
    878    const char *sopt = "hVk:f:dT:u:g:vq";
    879    struct option lopt[] = {
    880        { "help", no_argument, NULL, 'h' },
    881        { "version", no_argument, NULL, 'V' },
    882        { "socket", required_argument, NULL, 'k' },
    883        { "pidfile", required_argument, NULL, 'f' },
    884        { "daemon", no_argument, NULL, 'd' },
    885        { "trace", required_argument, NULL, 'T' },
    886        { "user", required_argument, NULL, 'u' },
    887        { "group", required_argument, NULL, 'g' },
    888        { "verbose", no_argument, NULL, 'v' },
    889        { "quiet", no_argument, NULL, 'q' },
    890        { NULL, 0, NULL, 0 }
    891    };
    892    int opt_ind = 0;
    893    int loglevel = 1;
    894    int quiet = 0;
    895    int ch;
    896    Error *local_err = NULL;
    897    bool daemonize = false;
    898    bool pidfile_specified = false;
    899    bool socket_path_specified = false;
    900    unsigned socket_activation;
    901
    902    struct sigaction sa_sigterm;
    903    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
    904    sa_sigterm.sa_handler = termsig_handler;
    905    sigaction(SIGTERM, &sa_sigterm, NULL);
    906    sigaction(SIGINT, &sa_sigterm, NULL);
    907    sigaction(SIGHUP, &sa_sigterm, NULL);
    908
    909    signal(SIGPIPE, SIG_IGN);
    910
    911    error_init(argv[0]);
    912    module_call_init(MODULE_INIT_TRACE);
    913    module_call_init(MODULE_INIT_QOM);
    914    qemu_add_opts(&qemu_trace_opts);
    915    qemu_init_exec_dir(argv[0]);
    916
    917    compute_default_paths();
    918
    919    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
    920        switch (ch) {
    921        case 'k':
    922            g_free(socket_path);
    923            socket_path = g_strdup(optarg);
    924            socket_path_specified = true;
    925            if (socket_path[0] != '/') {
    926                error_report("socket path must be absolute");
    927                exit(EXIT_FAILURE);
    928            }
    929            break;
    930        case 'f':
    931            g_free(pidfile);
    932            pidfile = g_strdup(optarg);
    933            pidfile_specified = true;
    934            break;
    935#ifdef CONFIG_LIBCAP_NG
    936        case 'u': {
    937            unsigned long res;
    938            struct passwd *userinfo = getpwnam(optarg);
    939            if (userinfo) {
    940                uid = userinfo->pw_uid;
    941            } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
    942                       (uid_t)res == res) {
    943                uid = res;
    944            } else {
    945                error_report("invalid user '%s'", optarg);
    946                exit(EXIT_FAILURE);
    947            }
    948            break;
    949        }
    950        case 'g': {
    951            unsigned long res;
    952            struct group *groupinfo = getgrnam(optarg);
    953            if (groupinfo) {
    954                gid = groupinfo->gr_gid;
    955            } else if (qemu_strtoul(optarg, NULL, 10, &res) == 0 &&
    956                       (gid_t)res == res) {
    957                gid = res;
    958            } else {
    959                error_report("invalid group '%s'", optarg);
    960                exit(EXIT_FAILURE);
    961            }
    962            break;
    963        }
    964#else
    965        case 'u':
    966        case 'g':
    967            error_report("-%c not supported by this %s", ch, argv[0]);
    968            exit(1);
    969#endif
    970        case 'd':
    971            daemonize = true;
    972            break;
    973        case 'q':
    974            quiet = 1;
    975            break;
    976        case 'v':
    977            ++loglevel;
    978            break;
    979        case 'T':
    980            trace_opt_parse(optarg);
    981            break;
    982        case 'V':
    983            version(argv[0]);
    984            exit(EXIT_SUCCESS);
    985            break;
    986        case 'h':
    987            usage(argv[0]);
    988            exit(EXIT_SUCCESS);
    989            break;
    990        case '?':
    991            error_report("Try `%s --help' for more information.", argv[0]);
    992            exit(EXIT_FAILURE);
    993        }
    994    }
    995
    996    /* set verbosity */
    997    noisy = !quiet && (loglevel >= 3);
    998    verbose = quiet ? 0 : MIN(loglevel, 3);
    999
   1000    if (!trace_init_backends()) {
   1001        exit(EXIT_FAILURE);
   1002    }
   1003    trace_init_file();
   1004    qemu_set_log(LOG_TRACE);
   1005
   1006#ifdef CONFIG_MPATH
   1007    dm_init();
   1008    multipath_pr_init();
   1009#endif
   1010
   1011    socket_activation = check_socket_activation();
   1012    if (socket_activation == 0) {
   1013        SocketAddress saddr;
   1014        saddr = (SocketAddress){
   1015            .type = SOCKET_ADDRESS_TYPE_UNIX,
   1016            .u.q_unix.path = socket_path,
   1017        };
   1018        server_ioc = qio_channel_socket_new();
   1019        if (qio_channel_socket_listen_sync(server_ioc, &saddr,
   1020                                           1, &local_err) < 0) {
   1021            object_unref(OBJECT(server_ioc));
   1022            error_report_err(local_err);
   1023            return 1;
   1024        }
   1025    } else {
   1026        /* Using socket activation - check user didn't use -p etc. */
   1027        if (socket_path_specified) {
   1028            error_report("Unix socket can't be set when using socket activation");
   1029            exit(EXIT_FAILURE);
   1030        }
   1031
   1032        /* Can only listen on a single socket.  */
   1033        if (socket_activation > 1) {
   1034            error_report("%s does not support socket activation with LISTEN_FDS > 1",
   1035                         argv[0]);
   1036            exit(EXIT_FAILURE);
   1037        }
   1038        server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
   1039                                               &local_err);
   1040        if (server_ioc == NULL) {
   1041            error_reportf_err(local_err,
   1042                              "Failed to use socket activation: ");
   1043            exit(EXIT_FAILURE);
   1044        }
   1045    }
   1046
   1047    qemu_init_main_loop(&error_fatal);
   1048
   1049    server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
   1050                                         G_IO_IN,
   1051                                         accept_client,
   1052                                         NULL, NULL);
   1053
   1054    if (daemonize) {
   1055        if (daemon(0, 0) < 0) {
   1056            error_report("Failed to daemonize: %s", strerror(errno));
   1057            exit(EXIT_FAILURE);
   1058        }
   1059    }
   1060
   1061    if (daemonize || pidfile_specified) {
   1062        qemu_write_pidfile(pidfile, &error_fatal);
   1063    }
   1064
   1065#ifdef CONFIG_LIBCAP_NG
   1066    if (drop_privileges() < 0) {
   1067        error_report("Failed to drop privileges: %s", strerror(errno));
   1068        exit(EXIT_FAILURE);
   1069    }
   1070#endif
   1071
   1072    state = RUNNING;
   1073    do {
   1074        main_loop_wait(false);
   1075        if (state == TERMINATE) {
   1076            state = TERMINATING;
   1077            close_server_socket();
   1078        }
   1079    } while (num_active_sockets > 0);
   1080
   1081    exit(EXIT_SUCCESS);
   1082}