cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

main.c (40557B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *  linux/init/main.c
      4 *
      5 *  Copyright (C) 1991, 1992  Linus Torvalds
      6 *
      7 *  GK 2/5/95  -  Changed to support mounting root fs via NFS
      8 *  Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
      9 *  Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
     10 *  Simplified starting of init:  Michael A. Griffith <grif@acm.org>
     11 */
     12
     13#define DEBUG		/* Enable initcall_debug */
     14
     15#include <linux/types.h>
     16#include <linux/extable.h>
     17#include <linux/module.h>
     18#include <linux/proc_fs.h>
     19#include <linux/binfmts.h>
     20#include <linux/kernel.h>
     21#include <linux/syscalls.h>
     22#include <linux/stackprotector.h>
     23#include <linux/string.h>
     24#include <linux/ctype.h>
     25#include <linux/delay.h>
     26#include <linux/ioport.h>
     27#include <linux/init.h>
     28#include <linux/initrd.h>
     29#include <linux/memblock.h>
     30#include <linux/acpi.h>
     31#include <linux/bootconfig.h>
     32#include <linux/console.h>
     33#include <linux/nmi.h>
     34#include <linux/percpu.h>
     35#include <linux/kmod.h>
     36#include <linux/kprobes.h>
     37#include <linux/vmalloc.h>
     38#include <linux/kernel_stat.h>
     39#include <linux/start_kernel.h>
     40#include <linux/security.h>
     41#include <linux/smp.h>
     42#include <linux/profile.h>
     43#include <linux/kfence.h>
     44#include <linux/rcupdate.h>
     45#include <linux/srcu.h>
     46#include <linux/moduleparam.h>
     47#include <linux/kallsyms.h>
     48#include <linux/buildid.h>
     49#include <linux/writeback.h>
     50#include <linux/cpu.h>
     51#include <linux/cpuset.h>
     52#include <linux/cgroup.h>
     53#include <linux/efi.h>
     54#include <linux/tick.h>
     55#include <linux/sched/isolation.h>
     56#include <linux/interrupt.h>
     57#include <linux/taskstats_kern.h>
     58#include <linux/delayacct.h>
     59#include <linux/unistd.h>
     60#include <linux/utsname.h>
     61#include <linux/rmap.h>
     62#include <linux/mempolicy.h>
     63#include <linux/key.h>
     64#include <linux/page_ext.h>
     65#include <linux/debug_locks.h>
     66#include <linux/debugobjects.h>
     67#include <linux/lockdep.h>
     68#include <linux/kmemleak.h>
     69#include <linux/padata.h>
     70#include <linux/pid_namespace.h>
     71#include <linux/device/driver.h>
     72#include <linux/kthread.h>
     73#include <linux/sched.h>
     74#include <linux/sched/init.h>
     75#include <linux/signal.h>
     76#include <linux/idr.h>
     77#include <linux/kgdb.h>
     78#include <linux/ftrace.h>
     79#include <linux/async.h>
     80#include <linux/shmem_fs.h>
     81#include <linux/slab.h>
     82#include <linux/perf_event.h>
     83#include <linux/ptrace.h>
     84#include <linux/pti.h>
     85#include <linux/blkdev.h>
     86#include <linux/sched/clock.h>
     87#include <linux/sched/task.h>
     88#include <linux/sched/task_stack.h>
     89#include <linux/context_tracking.h>
     90#include <linux/random.h>
     91#include <linux/list.h>
     92#include <linux/integrity.h>
     93#include <linux/proc_ns.h>
     94#include <linux/io.h>
     95#include <linux/cache.h>
     96#include <linux/rodata_test.h>
     97#include <linux/jump_label.h>
     98#include <linux/mem_encrypt.h>
     99#include <linux/kcsan.h>
    100#include <linux/init_syscalls.h>
    101#include <linux/stackdepot.h>
    102#include <net/net_namespace.h>
    103
    104#include <asm/io.h>
    105#include <asm/bugs.h>
    106#include <asm/setup.h>
    107#include <asm/sections.h>
    108#include <asm/cacheflush.h>
    109
    110#define CREATE_TRACE_POINTS
    111#include <trace/events/initcall.h>
    112
    113#include <kunit/test.h>
    114
    115static int kernel_init(void *);
    116
    117extern void init_IRQ(void);
    118extern void radix_tree_init(void);
    119
    120/*
    121 * Debug helper: via this flag we know that we are in 'early bootup code'
    122 * where only the boot processor is running with IRQ disabled.  This means
    123 * two things - IRQ must not be enabled before the flag is cleared and some
    124 * operations which are not allowed with IRQ disabled are allowed while the
    125 * flag is set.
    126 */
    127bool early_boot_irqs_disabled __read_mostly;
    128
    129enum system_states system_state __read_mostly;
    130EXPORT_SYMBOL(system_state);
    131
    132/*
    133 * Boot command-line arguments
    134 */
    135#define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
    136#define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
    137
    138extern void time_init(void);
    139/* Default late time init is NULL. archs can override this later. */
    140void (*__initdata late_time_init)(void);
    141
    142/* Untouched command line saved by arch-specific code. */
    143char __initdata boot_command_line[COMMAND_LINE_SIZE];
    144/* Untouched saved command line (eg. for /proc) */
    145char *saved_command_line;
    146/* Command line for parameter parsing */
    147static char *static_command_line;
    148/* Untouched extra command line */
    149static char *extra_command_line;
    150/* Extra init arguments */
    151static char *extra_init_args;
    152
    153#ifdef CONFIG_BOOT_CONFIG
    154/* Is bootconfig on command line? */
    155static bool bootconfig_found;
    156static size_t initargs_offs;
    157#else
    158# define bootconfig_found false
    159# define initargs_offs 0
    160#endif
    161
    162static char *execute_command;
    163static char *ramdisk_execute_command = "/init";
    164
    165/*
    166 * Used to generate warnings if static_key manipulation functions are used
    167 * before jump_label_init is called.
    168 */
    169bool static_key_initialized __read_mostly;
    170EXPORT_SYMBOL_GPL(static_key_initialized);
    171
    172/*
    173 * If set, this is an indication to the drivers that reset the underlying
    174 * device before going ahead with the initialization otherwise driver might
    175 * rely on the BIOS and skip the reset operation.
    176 *
    177 * This is useful if kernel is booting in an unreliable environment.
    178 * For ex. kdump situation where previous kernel has crashed, BIOS has been
    179 * skipped and devices will be in unknown state.
    180 */
    181unsigned int reset_devices;
    182EXPORT_SYMBOL(reset_devices);
    183
    184static int __init set_reset_devices(char *str)
    185{
    186	reset_devices = 1;
    187	return 1;
    188}
    189
    190__setup("reset_devices", set_reset_devices);
    191
    192static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
    193const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
    194static const char *panic_later, *panic_param;
    195
    196extern const struct obs_kernel_param __setup_start[], __setup_end[];
    197
    198static bool __init obsolete_checksetup(char *line)
    199{
    200	const struct obs_kernel_param *p;
    201	bool had_early_param = false;
    202
    203	p = __setup_start;
    204	do {
    205		int n = strlen(p->str);
    206		if (parameqn(line, p->str, n)) {
    207			if (p->early) {
    208				/* Already done in parse_early_param?
    209				 * (Needs exact match on param part).
    210				 * Keep iterating, as we can have early
    211				 * params and __setups of same names 8( */
    212				if (line[n] == '\0' || line[n] == '=')
    213					had_early_param = true;
    214			} else if (!p->setup_func) {
    215				pr_warn("Parameter %s is obsolete, ignored\n",
    216					p->str);
    217				return true;
    218			} else if (p->setup_func(line + n))
    219				return true;
    220		}
    221		p++;
    222	} while (p < __setup_end);
    223
    224	return had_early_param;
    225}
    226
    227/*
    228 * This should be approx 2 Bo*oMips to start (note initial shift), and will
    229 * still work even if initially too large, it will just take slightly longer
    230 */
    231unsigned long loops_per_jiffy = (1<<12);
    232EXPORT_SYMBOL(loops_per_jiffy);
    233
    234static int __init debug_kernel(char *str)
    235{
    236	console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
    237	return 0;
    238}
    239
    240static int __init quiet_kernel(char *str)
    241{
    242	console_loglevel = CONSOLE_LOGLEVEL_QUIET;
    243	return 0;
    244}
    245
    246early_param("debug", debug_kernel);
    247early_param("quiet", quiet_kernel);
    248
    249static int __init loglevel(char *str)
    250{
    251	int newlevel;
    252
    253	/*
    254	 * Only update loglevel value when a correct setting was passed,
    255	 * to prevent blind crashes (when loglevel being set to 0) that
    256	 * are quite hard to debug
    257	 */
    258	if (get_option(&str, &newlevel)) {
    259		console_loglevel = newlevel;
    260		return 0;
    261	}
    262
    263	return -EINVAL;
    264}
    265
    266early_param("loglevel", loglevel);
    267
    268#ifdef CONFIG_BLK_DEV_INITRD
    269static void * __init get_boot_config_from_initrd(size_t *_size)
    270{
    271	u32 size, csum;
    272	char *data;
    273	u32 *hdr;
    274	int i;
    275
    276	if (!initrd_end)
    277		return NULL;
    278
    279	data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
    280	/*
    281	 * Since Grub may align the size of initrd to 4, we must
    282	 * check the preceding 3 bytes as well.
    283	 */
    284	for (i = 0; i < 4; i++) {
    285		if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
    286			goto found;
    287		data--;
    288	}
    289	return NULL;
    290
    291found:
    292	hdr = (u32 *)(data - 8);
    293	size = le32_to_cpu(hdr[0]);
    294	csum = le32_to_cpu(hdr[1]);
    295
    296	data = ((void *)hdr) - size;
    297	if ((unsigned long)data < initrd_start) {
    298		pr_err("bootconfig size %d is greater than initrd size %ld\n",
    299			size, initrd_end - initrd_start);
    300		return NULL;
    301	}
    302
    303	if (xbc_calc_checksum(data, size) != csum) {
    304		pr_err("bootconfig checksum failed\n");
    305		return NULL;
    306	}
    307
    308	/* Remove bootconfig from initramfs/initrd */
    309	initrd_end = (unsigned long)data;
    310	if (_size)
    311		*_size = size;
    312
    313	return data;
    314}
    315#else
    316static void * __init get_boot_config_from_initrd(size_t *_size)
    317{
    318	return NULL;
    319}
    320#endif
    321
    322#ifdef CONFIG_BOOT_CONFIG
    323
    324static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
    325
    326#define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
    327
    328static int __init xbc_snprint_cmdline(char *buf, size_t size,
    329				      struct xbc_node *root)
    330{
    331	struct xbc_node *knode, *vnode;
    332	char *end = buf + size;
    333	const char *val;
    334	int ret;
    335
    336	xbc_node_for_each_key_value(root, knode, val) {
    337		ret = xbc_node_compose_key_after(root, knode,
    338					xbc_namebuf, XBC_KEYLEN_MAX);
    339		if (ret < 0)
    340			return ret;
    341
    342		vnode = xbc_node_get_child(knode);
    343		if (!vnode) {
    344			ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
    345			if (ret < 0)
    346				return ret;
    347			buf += ret;
    348			continue;
    349		}
    350		xbc_array_for_each_value(vnode, val) {
    351			ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
    352				       xbc_namebuf, val);
    353			if (ret < 0)
    354				return ret;
    355			buf += ret;
    356		}
    357	}
    358
    359	return buf - (end - size);
    360}
    361#undef rest
    362
    363/* Make an extra command line under given key word */
    364static char * __init xbc_make_cmdline(const char *key)
    365{
    366	struct xbc_node *root;
    367	char *new_cmdline;
    368	int ret, len = 0;
    369
    370	root = xbc_find_node(key);
    371	if (!root)
    372		return NULL;
    373
    374	/* Count required buffer size */
    375	len = xbc_snprint_cmdline(NULL, 0, root);
    376	if (len <= 0)
    377		return NULL;
    378
    379	new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES);
    380	if (!new_cmdline) {
    381		pr_err("Failed to allocate memory for extra kernel cmdline.\n");
    382		return NULL;
    383	}
    384
    385	ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
    386	if (ret < 0 || ret > len) {
    387		pr_err("Failed to print extra kernel cmdline.\n");
    388		memblock_free(new_cmdline, len + 1);
    389		return NULL;
    390	}
    391
    392	return new_cmdline;
    393}
    394
    395static int __init bootconfig_params(char *param, char *val,
    396				    const char *unused, void *arg)
    397{
    398	if (strcmp(param, "bootconfig") == 0) {
    399		bootconfig_found = true;
    400	}
    401	return 0;
    402}
    403
    404static int __init warn_bootconfig(char *str)
    405{
    406	/* The 'bootconfig' has been handled by bootconfig_params(). */
    407	return 0;
    408}
    409
    410static void __init setup_boot_config(void)
    411{
    412	static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
    413	const char *msg, *data;
    414	int pos, ret;
    415	size_t size;
    416	char *err;
    417
    418	/* Cut out the bootconfig data even if we have no bootconfig option */
    419	data = get_boot_config_from_initrd(&size);
    420	/* If there is no bootconfig in initrd, try embedded one. */
    421	if (!data)
    422		data = xbc_get_embedded_bootconfig(&size);
    423
    424	strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
    425	err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
    426			 bootconfig_params);
    427
    428	if (IS_ERR(err) || !bootconfig_found)
    429		return;
    430
    431	/* parse_args() stops at the next param of '--' and returns an address */
    432	if (err)
    433		initargs_offs = err - tmp_cmdline;
    434
    435	if (!data) {
    436		pr_err("'bootconfig' found on command line, but no bootconfig found\n");
    437		return;
    438	}
    439
    440	if (size >= XBC_DATA_MAX) {
    441		pr_err("bootconfig size %ld greater than max size %d\n",
    442			(long)size, XBC_DATA_MAX);
    443		return;
    444	}
    445
    446	ret = xbc_init(data, size, &msg, &pos);
    447	if (ret < 0) {
    448		if (pos < 0)
    449			pr_err("Failed to init bootconfig: %s.\n", msg);
    450		else
    451			pr_err("Failed to parse bootconfig: %s at %d.\n",
    452				msg, pos);
    453	} else {
    454		xbc_get_info(&ret, NULL);
    455		pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
    456		/* keys starting with "kernel." are passed via cmdline */
    457		extra_command_line = xbc_make_cmdline("kernel");
    458		/* Also, "init." keys are init arguments */
    459		extra_init_args = xbc_make_cmdline("init");
    460	}
    461	return;
    462}
    463
    464static void __init exit_boot_config(void)
    465{
    466	xbc_exit();
    467}
    468
    469#else	/* !CONFIG_BOOT_CONFIG */
    470
    471static void __init setup_boot_config(void)
    472{
    473	/* Remove bootconfig data from initrd */
    474	get_boot_config_from_initrd(NULL);
    475}
    476
    477static int __init warn_bootconfig(char *str)
    478{
    479	pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
    480	return 0;
    481}
    482
    483#define exit_boot_config()	do {} while (0)
    484
    485#endif	/* CONFIG_BOOT_CONFIG */
    486
    487early_param("bootconfig", warn_bootconfig);
    488
    489/* Change NUL term back to "=", to make "param" the whole string. */
    490static void __init repair_env_string(char *param, char *val)
    491{
    492	if (val) {
    493		/* param=val or param="val"? */
    494		if (val == param+strlen(param)+1)
    495			val[-1] = '=';
    496		else if (val == param+strlen(param)+2) {
    497			val[-2] = '=';
    498			memmove(val-1, val, strlen(val)+1);
    499		} else
    500			BUG();
    501	}
    502}
    503
    504/* Anything after -- gets handed straight to init. */
    505static int __init set_init_arg(char *param, char *val,
    506			       const char *unused, void *arg)
    507{
    508	unsigned int i;
    509
    510	if (panic_later)
    511		return 0;
    512
    513	repair_env_string(param, val);
    514
    515	for (i = 0; argv_init[i]; i++) {
    516		if (i == MAX_INIT_ARGS) {
    517			panic_later = "init";
    518			panic_param = param;
    519			return 0;
    520		}
    521	}
    522	argv_init[i] = param;
    523	return 0;
    524}
    525
    526/*
    527 * Unknown boot options get handed to init, unless they look like
    528 * unused parameters (modprobe will find them in /proc/cmdline).
    529 */
    530static int __init unknown_bootoption(char *param, char *val,
    531				     const char *unused, void *arg)
    532{
    533	size_t len = strlen(param);
    534
    535	repair_env_string(param, val);
    536
    537	/* Handle obsolete-style parameters */
    538	if (obsolete_checksetup(param))
    539		return 0;
    540
    541	/* Unused module parameter. */
    542	if (strnchr(param, len, '.'))
    543		return 0;
    544
    545	if (panic_later)
    546		return 0;
    547
    548	if (val) {
    549		/* Environment option */
    550		unsigned int i;
    551		for (i = 0; envp_init[i]; i++) {
    552			if (i == MAX_INIT_ENVS) {
    553				panic_later = "env";
    554				panic_param = param;
    555			}
    556			if (!strncmp(param, envp_init[i], len+1))
    557				break;
    558		}
    559		envp_init[i] = param;
    560	} else {
    561		/* Command line option */
    562		unsigned int i;
    563		for (i = 0; argv_init[i]; i++) {
    564			if (i == MAX_INIT_ARGS) {
    565				panic_later = "init";
    566				panic_param = param;
    567			}
    568		}
    569		argv_init[i] = param;
    570	}
    571	return 0;
    572}
    573
    574static int __init init_setup(char *str)
    575{
    576	unsigned int i;
    577
    578	execute_command = str;
    579	/*
    580	 * In case LILO is going to boot us with default command line,
    581	 * it prepends "auto" before the whole cmdline which makes
    582	 * the shell think it should execute a script with such name.
    583	 * So we ignore all arguments entered _before_ init=... [MJ]
    584	 */
    585	for (i = 1; i < MAX_INIT_ARGS; i++)
    586		argv_init[i] = NULL;
    587	return 1;
    588}
    589__setup("init=", init_setup);
    590
    591static int __init rdinit_setup(char *str)
    592{
    593	unsigned int i;
    594
    595	ramdisk_execute_command = str;
    596	/* See "auto" comment in init_setup */
    597	for (i = 1; i < MAX_INIT_ARGS; i++)
    598		argv_init[i] = NULL;
    599	return 1;
    600}
    601__setup("rdinit=", rdinit_setup);
    602
    603#ifndef CONFIG_SMP
    604static const unsigned int setup_max_cpus = NR_CPUS;
    605static inline void setup_nr_cpu_ids(void) { }
    606static inline void smp_prepare_cpus(unsigned int maxcpus) { }
    607#endif
    608
    609/*
    610 * We need to store the untouched command line for future reference.
    611 * We also need to store the touched command line since the parameter
    612 * parsing is performed in place, and we should allow a component to
    613 * store reference of name/value for future reference.
    614 */
    615static void __init setup_command_line(char *command_line)
    616{
    617	size_t len, xlen = 0, ilen = 0;
    618
    619	if (extra_command_line)
    620		xlen = strlen(extra_command_line);
    621	if (extra_init_args)
    622		ilen = strlen(extra_init_args) + 4; /* for " -- " */
    623
    624	len = xlen + strlen(boot_command_line) + 1;
    625
    626	saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES);
    627	if (!saved_command_line)
    628		panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
    629
    630	static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
    631	if (!static_command_line)
    632		panic("%s: Failed to allocate %zu bytes\n", __func__, len);
    633
    634	if (xlen) {
    635		/*
    636		 * We have to put extra_command_line before boot command
    637		 * lines because there could be dashes (separator of init
    638		 * command line) in the command lines.
    639		 */
    640		strcpy(saved_command_line, extra_command_line);
    641		strcpy(static_command_line, extra_command_line);
    642	}
    643	strcpy(saved_command_line + xlen, boot_command_line);
    644	strcpy(static_command_line + xlen, command_line);
    645
    646	if (ilen) {
    647		/*
    648		 * Append supplemental init boot args to saved_command_line
    649		 * so that user can check what command line options passed
    650		 * to init.
    651		 * The order should always be
    652		 * " -- "[bootconfig init-param][cmdline init-param]
    653		 */
    654		if (initargs_offs) {
    655			len = xlen + initargs_offs;
    656			strcpy(saved_command_line + len, extra_init_args);
    657			len += ilen - 4;	/* strlen(extra_init_args) */
    658			strcpy(saved_command_line + len,
    659				boot_command_line + initargs_offs - 1);
    660		} else {
    661			len = strlen(saved_command_line);
    662			strcpy(saved_command_line + len, " -- ");
    663			len += 4;
    664			strcpy(saved_command_line + len, extra_init_args);
    665		}
    666	}
    667}
    668
    669/*
    670 * We need to finalize in a non-__init function or else race conditions
    671 * between the root thread and the init thread may cause start_kernel to
    672 * be reaped by free_initmem before the root thread has proceeded to
    673 * cpu_idle.
    674 *
    675 * gcc-3.4 accidentally inlines this function, so use noinline.
    676 */
    677
    678static __initdata DECLARE_COMPLETION(kthreadd_done);
    679
    680noinline void __ref rest_init(void)
    681{
    682	struct task_struct *tsk;
    683	int pid;
    684
    685	rcu_scheduler_starting();
    686	/*
    687	 * We need to spawn init first so that it obtains pid 1, however
    688	 * the init task will end up wanting to create kthreads, which, if
    689	 * we schedule it before we create kthreadd, will OOPS.
    690	 */
    691	pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
    692	/*
    693	 * Pin init on the boot CPU. Task migration is not properly working
    694	 * until sched_init_smp() has been run. It will set the allowed
    695	 * CPUs for init to the non isolated CPUs.
    696	 */
    697	rcu_read_lock();
    698	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
    699	tsk->flags |= PF_NO_SETAFFINITY;
    700	set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
    701	rcu_read_unlock();
    702
    703	numa_default_policy();
    704	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
    705	rcu_read_lock();
    706	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
    707	rcu_read_unlock();
    708
    709	/*
    710	 * Enable might_sleep() and smp_processor_id() checks.
    711	 * They cannot be enabled earlier because with CONFIG_PREEMPTION=y
    712	 * kernel_thread() would trigger might_sleep() splats. With
    713	 * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
    714	 * already, but it's stuck on the kthreadd_done completion.
    715	 */
    716	system_state = SYSTEM_SCHEDULING;
    717
    718	complete(&kthreadd_done);
    719
    720	/*
    721	 * The boot idle thread must execute schedule()
    722	 * at least once to get things moving:
    723	 */
    724	schedule_preempt_disabled();
    725	/* Call into cpu_idle with preempt disabled */
    726	cpu_startup_entry(CPUHP_ONLINE);
    727}
    728
    729/* Check for early params. */
    730static int __init do_early_param(char *param, char *val,
    731				 const char *unused, void *arg)
    732{
    733	const struct obs_kernel_param *p;
    734
    735	for (p = __setup_start; p < __setup_end; p++) {
    736		if ((p->early && parameq(param, p->str)) ||
    737		    (strcmp(param, "console") == 0 &&
    738		     strcmp(p->str, "earlycon") == 0)
    739		) {
    740			if (p->setup_func(val) != 0)
    741				pr_warn("Malformed early option '%s'\n", param);
    742		}
    743	}
    744	/* We accept everything at this stage. */
    745	return 0;
    746}
    747
    748void __init parse_early_options(char *cmdline)
    749{
    750	parse_args("early options", cmdline, NULL, 0, 0, 0, NULL,
    751		   do_early_param);
    752}
    753
    754/* Arch code calls this early on, or if not, just before other parsing. */
    755void __init parse_early_param(void)
    756{
    757	static int done __initdata;
    758	static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
    759
    760	if (done)
    761		return;
    762
    763	/* All fall through to do_early_param. */
    764	strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
    765	parse_early_options(tmp_cmdline);
    766	done = 1;
    767}
    768
    769void __init __weak arch_post_acpi_subsys_init(void) { }
    770
    771void __init __weak smp_setup_processor_id(void)
    772{
    773}
    774
    775# if THREAD_SIZE >= PAGE_SIZE
    776void __init __weak thread_stack_cache_init(void)
    777{
    778}
    779#endif
    780
    781void __init __weak mem_encrypt_init(void) { }
    782
    783void __init __weak poking_init(void) { }
    784
    785void __init __weak pgtable_cache_init(void) { }
    786
    787void __init __weak trap_init(void) { }
    788
    789bool initcall_debug;
    790core_param(initcall_debug, initcall_debug, bool, 0644);
    791
    792#ifdef TRACEPOINTS_ENABLED
    793static void __init initcall_debug_enable(void);
    794#else
    795static inline void initcall_debug_enable(void)
    796{
    797}
    798#endif
    799
    800/* Report memory auto-initialization states for this boot. */
    801static void __init report_meminit(void)
    802{
    803	const char *stack;
    804
    805	if (IS_ENABLED(CONFIG_INIT_STACK_ALL_PATTERN))
    806		stack = "all(pattern)";
    807	else if (IS_ENABLED(CONFIG_INIT_STACK_ALL_ZERO))
    808		stack = "all(zero)";
    809	else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL))
    810		stack = "byref_all(zero)";
    811	else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF))
    812		stack = "byref(zero)";
    813	else if (IS_ENABLED(CONFIG_GCC_PLUGIN_STRUCTLEAK_USER))
    814		stack = "__user(zero)";
    815	else
    816		stack = "off";
    817
    818	pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n",
    819		stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off",
    820		want_init_on_free() ? "on" : "off");
    821	if (want_init_on_free())
    822		pr_info("mem auto-init: clearing system memory may take some time...\n");
    823}
    824
    825/*
    826 * Set up kernel memory allocators
    827 */
    828static void __init mm_init(void)
    829{
    830	/*
    831	 * page_ext requires contiguous pages,
    832	 * bigger than MAX_ORDER unless SPARSEMEM.
    833	 */
    834	page_ext_init_flatmem();
    835	init_mem_debugging_and_hardening();
    836	kfence_alloc_pool();
    837	report_meminit();
    838	stack_depot_early_init();
    839	mem_init();
    840	mem_init_print_info();
    841	kmem_cache_init();
    842	/*
    843	 * page_owner must be initialized after buddy is ready, and also after
    844	 * slab is ready so that stack_depot_init() works properly
    845	 */
    846	page_ext_init_flatmem_late();
    847	kmemleak_init();
    848	pgtable_init();
    849	debug_objects_mem_init();
    850	vmalloc_init();
    851	/* Should be run before the first non-init thread is created */
    852	init_espfix_bsp();
    853	/* Should be run after espfix64 is set up. */
    854	pti_init();
    855}
    856
    857#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
    858DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
    859			   randomize_kstack_offset);
    860DEFINE_PER_CPU(u32, kstack_offset);
    861
    862static int __init early_randomize_kstack_offset(char *buf)
    863{
    864	int ret;
    865	bool bool_result;
    866
    867	ret = kstrtobool(buf, &bool_result);
    868	if (ret)
    869		return ret;
    870
    871	if (bool_result)
    872		static_branch_enable(&randomize_kstack_offset);
    873	else
    874		static_branch_disable(&randomize_kstack_offset);
    875	return 0;
    876}
    877early_param("randomize_kstack_offset", early_randomize_kstack_offset);
    878#endif
    879
    880void __init __weak arch_call_rest_init(void)
    881{
    882	rest_init();
    883}
    884
    885static void __init print_unknown_bootoptions(void)
    886{
    887	char *unknown_options;
    888	char *end;
    889	const char *const *p;
    890	size_t len;
    891
    892	if (panic_later || (!argv_init[1] && !envp_init[2]))
    893		return;
    894
    895	/*
    896	 * Determine how many options we have to print out, plus a space
    897	 * before each
    898	 */
    899	len = 1; /* null terminator */
    900	for (p = &argv_init[1]; *p; p++) {
    901		len++;
    902		len += strlen(*p);
    903	}
    904	for (p = &envp_init[2]; *p; p++) {
    905		len++;
    906		len += strlen(*p);
    907	}
    908
    909	unknown_options = memblock_alloc(len, SMP_CACHE_BYTES);
    910	if (!unknown_options) {
    911		pr_err("%s: Failed to allocate %zu bytes\n",
    912			__func__, len);
    913		return;
    914	}
    915	end = unknown_options;
    916
    917	for (p = &argv_init[1]; *p; p++)
    918		end += sprintf(end, " %s", *p);
    919	for (p = &envp_init[2]; *p; p++)
    920		end += sprintf(end, " %s", *p);
    921
    922	/* Start at unknown_options[1] to skip the initial space */
    923	pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n",
    924		&unknown_options[1]);
    925	memblock_free(unknown_options, len);
    926}
    927
    928asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
    929{
    930	char *command_line;
    931	char *after_dashes;
    932
    933	set_task_stack_end_magic(&init_task);
    934	smp_setup_processor_id();
    935	debug_objects_early_init();
    936	init_vmlinux_build_id();
    937
    938	cgroup_init_early();
    939
    940	local_irq_disable();
    941	early_boot_irqs_disabled = true;
    942
    943	/*
    944	 * Interrupts are still disabled. Do necessary setups, then
    945	 * enable them.
    946	 */
    947	boot_cpu_init();
    948	page_address_init();
    949	pr_notice("%s", linux_banner);
    950	early_security_init();
    951	setup_arch(&command_line);
    952	setup_boot_config();
    953	setup_command_line(command_line);
    954	setup_nr_cpu_ids();
    955	setup_per_cpu_areas();
    956	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
    957	boot_cpu_hotplug_init();
    958
    959	build_all_zonelists(NULL);
    960	page_alloc_init();
    961
    962	pr_notice("Kernel command line: %s\n", saved_command_line);
    963	/* parameters may set static keys */
    964	jump_label_init();
    965	parse_early_param();
    966	after_dashes = parse_args("Booting kernel",
    967				  static_command_line, __start___param,
    968				  __stop___param - __start___param,
    969				  -1, -1, NULL, &unknown_bootoption);
    970	print_unknown_bootoptions();
    971	if (!IS_ERR_OR_NULL(after_dashes))
    972		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
    973			   NULL, set_init_arg);
    974	if (extra_init_args)
    975		parse_args("Setting extra init args", extra_init_args,
    976			   NULL, 0, -1, -1, NULL, set_init_arg);
    977
    978	/*
    979	 * These use large bootmem allocations and must precede
    980	 * kmem_cache_init()
    981	 */
    982	setup_log_buf(0);
    983	vfs_caches_init_early();
    984	sort_main_extable();
    985	trap_init();
    986	mm_init();
    987
    988	ftrace_init();
    989
    990	/* trace_printk can be enabled here */
    991	early_trace_init();
    992
    993	/*
    994	 * Set up the scheduler prior starting any interrupts (such as the
    995	 * timer interrupt). Full topology setup happens at smp_init()
    996	 * time - but meanwhile we still have a functioning scheduler.
    997	 */
    998	sched_init();
    999
   1000	if (WARN(!irqs_disabled(),
   1001		 "Interrupts were enabled *very* early, fixing it\n"))
   1002		local_irq_disable();
   1003	radix_tree_init();
   1004
   1005	/*
   1006	 * Set up housekeeping before setting up workqueues to allow the unbound
   1007	 * workqueue to take non-housekeeping into account.
   1008	 */
   1009	housekeeping_init();
   1010
   1011	/*
   1012	 * Allow workqueue creation and work item queueing/cancelling
   1013	 * early.  Work item execution depends on kthreads and starts after
   1014	 * workqueue_init().
   1015	 */
   1016	workqueue_init_early();
   1017
   1018	rcu_init();
   1019
   1020	/* Trace events are available after this */
   1021	trace_init();
   1022
   1023	if (initcall_debug)
   1024		initcall_debug_enable();
   1025
   1026	context_tracking_init();
   1027	/* init some links before init_ISA_irqs() */
   1028	early_irq_init();
   1029	init_IRQ();
   1030	tick_init();
   1031	rcu_init_nohz();
   1032	init_timers();
   1033	srcu_init();
   1034	hrtimers_init();
   1035	softirq_init();
   1036	timekeeping_init();
   1037	kfence_init();
   1038	time_init();
   1039
   1040	/*
   1041	 * For best initial stack canary entropy, prepare it after:
   1042	 * - setup_arch() for any UEFI RNG entropy and boot cmdline access
   1043	 * - timekeeping_init() for ktime entropy used in random_init()
   1044	 * - time_init() for making random_get_entropy() work on some platforms
   1045	 * - random_init() to initialize the RNG from from early entropy sources
   1046	 */
   1047	random_init(command_line);
   1048	boot_init_stack_canary();
   1049
   1050	perf_event_init();
   1051	profile_init();
   1052	call_function_init();
   1053	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
   1054
   1055	early_boot_irqs_disabled = false;
   1056	local_irq_enable();
   1057
   1058	kmem_cache_init_late();
   1059
   1060	/*
   1061	 * HACK ALERT! This is early. We're enabling the console before
   1062	 * we've done PCI setups etc, and console_init() must be aware of
   1063	 * this. But we do want output early, in case something goes wrong.
   1064	 */
   1065	console_init();
   1066	if (panic_later)
   1067		panic("Too many boot %s vars at `%s'", panic_later,
   1068		      panic_param);
   1069
   1070	lockdep_init();
   1071
   1072	/*
   1073	 * Need to run this when irqs are enabled, because it wants
   1074	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
   1075	 * too:
   1076	 */
   1077	locking_selftest();
   1078
   1079	/*
   1080	 * This needs to be called before any devices perform DMA
   1081	 * operations that might use the SWIOTLB bounce buffers. It will
   1082	 * mark the bounce buffers as decrypted so that their usage will
   1083	 * not cause "plain-text" data to be decrypted when accessed.
   1084	 */
   1085	mem_encrypt_init();
   1086
   1087#ifdef CONFIG_BLK_DEV_INITRD
   1088	if (initrd_start && !initrd_below_start_ok &&
   1089	    page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
   1090		pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
   1091		    page_to_pfn(virt_to_page((void *)initrd_start)),
   1092		    min_low_pfn);
   1093		initrd_start = 0;
   1094	}
   1095#endif
   1096	setup_per_cpu_pageset();
   1097	numa_policy_init();
   1098	acpi_early_init();
   1099	if (late_time_init)
   1100		late_time_init();
   1101	sched_clock_init();
   1102	calibrate_delay();
   1103	pid_idr_init();
   1104	anon_vma_init();
   1105#ifdef CONFIG_X86
   1106	if (efi_enabled(EFI_RUNTIME_SERVICES))
   1107		efi_enter_virtual_mode();
   1108#endif
   1109	thread_stack_cache_init();
   1110	cred_init();
   1111	fork_init();
   1112	proc_caches_init();
   1113	uts_ns_init();
   1114	key_init();
   1115	security_init();
   1116	dbg_late_init();
   1117	net_ns_init();
   1118	vfs_caches_init();
   1119	pagecache_init();
   1120	signals_init();
   1121	seq_file_init();
   1122	proc_root_init();
   1123	nsfs_init();
   1124	cpuset_init();
   1125	cgroup_init();
   1126	taskstats_init_early();
   1127	delayacct_init();
   1128
   1129	poking_init();
   1130	check_bugs();
   1131
   1132	acpi_subsystem_init();
   1133	arch_post_acpi_subsys_init();
   1134	kcsan_init();
   1135
   1136	/* Do the rest non-__init'ed, we're now alive */
   1137	arch_call_rest_init();
   1138
   1139	prevent_tail_call_optimization();
   1140}
   1141
   1142/* Call all constructor functions linked into the kernel. */
   1143static void __init do_ctors(void)
   1144{
   1145/*
   1146 * For UML, the constructors have already been called by the
   1147 * normal setup code as it's just a normal ELF binary, so we
   1148 * cannot do it again - but we do need CONFIG_CONSTRUCTORS
   1149 * even on UML for modules.
   1150 */
   1151#if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML)
   1152	ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
   1153
   1154	for (; fn < (ctor_fn_t *) __ctors_end; fn++)
   1155		(*fn)();
   1156#endif
   1157}
   1158
   1159#ifdef CONFIG_KALLSYMS
   1160struct blacklist_entry {
   1161	struct list_head next;
   1162	char *buf;
   1163};
   1164
   1165static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
   1166
   1167static int __init initcall_blacklist(char *str)
   1168{
   1169	char *str_entry;
   1170	struct blacklist_entry *entry;
   1171
   1172	/* str argument is a comma-separated list of functions */
   1173	do {
   1174		str_entry = strsep(&str, ",");
   1175		if (str_entry) {
   1176			pr_debug("blacklisting initcall %s\n", str_entry);
   1177			entry = memblock_alloc(sizeof(*entry),
   1178					       SMP_CACHE_BYTES);
   1179			if (!entry)
   1180				panic("%s: Failed to allocate %zu bytes\n",
   1181				      __func__, sizeof(*entry));
   1182			entry->buf = memblock_alloc(strlen(str_entry) + 1,
   1183						    SMP_CACHE_BYTES);
   1184			if (!entry->buf)
   1185				panic("%s: Failed to allocate %zu bytes\n",
   1186				      __func__, strlen(str_entry) + 1);
   1187			strcpy(entry->buf, str_entry);
   1188			list_add(&entry->next, &blacklisted_initcalls);
   1189		}
   1190	} while (str_entry);
   1191
   1192	return 1;
   1193}
   1194
   1195static bool __init_or_module initcall_blacklisted(initcall_t fn)
   1196{
   1197	struct blacklist_entry *entry;
   1198	char fn_name[KSYM_SYMBOL_LEN];
   1199	unsigned long addr;
   1200
   1201	if (list_empty(&blacklisted_initcalls))
   1202		return false;
   1203
   1204	addr = (unsigned long) dereference_function_descriptor(fn);
   1205	sprint_symbol_no_offset(fn_name, addr);
   1206
   1207	/*
   1208	 * fn will be "function_name [module_name]" where [module_name] is not
   1209	 * displayed for built-in init functions.  Strip off the [module_name].
   1210	 */
   1211	strreplace(fn_name, ' ', '\0');
   1212
   1213	list_for_each_entry(entry, &blacklisted_initcalls, next) {
   1214		if (!strcmp(fn_name, entry->buf)) {
   1215			pr_debug("initcall %s blacklisted\n", fn_name);
   1216			return true;
   1217		}
   1218	}
   1219
   1220	return false;
   1221}
   1222#else
   1223static int __init initcall_blacklist(char *str)
   1224{
   1225	pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
   1226	return 0;
   1227}
   1228
   1229static bool __init_or_module initcall_blacklisted(initcall_t fn)
   1230{
   1231	return false;
   1232}
   1233#endif
   1234__setup("initcall_blacklist=", initcall_blacklist);
   1235
   1236static __init_or_module void
   1237trace_initcall_start_cb(void *data, initcall_t fn)
   1238{
   1239	ktime_t *calltime = (ktime_t *)data;
   1240
   1241	printk(KERN_DEBUG "calling  %pS @ %i\n", fn, task_pid_nr(current));
   1242	*calltime = ktime_get();
   1243}
   1244
   1245static __init_or_module void
   1246trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
   1247{
   1248	ktime_t rettime, *calltime = (ktime_t *)data;
   1249
   1250	rettime = ktime_get();
   1251	printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
   1252		 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
   1253}
   1254
   1255static ktime_t initcall_calltime;
   1256
   1257#ifdef TRACEPOINTS_ENABLED
   1258static void __init initcall_debug_enable(void)
   1259{
   1260	int ret;
   1261
   1262	ret = register_trace_initcall_start(trace_initcall_start_cb,
   1263					    &initcall_calltime);
   1264	ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
   1265					      &initcall_calltime);
   1266	WARN(ret, "Failed to register initcall tracepoints\n");
   1267}
   1268# define do_trace_initcall_start	trace_initcall_start
   1269# define do_trace_initcall_finish	trace_initcall_finish
   1270#else
   1271static inline void do_trace_initcall_start(initcall_t fn)
   1272{
   1273	if (!initcall_debug)
   1274		return;
   1275	trace_initcall_start_cb(&initcall_calltime, fn);
   1276}
   1277static inline void do_trace_initcall_finish(initcall_t fn, int ret)
   1278{
   1279	if (!initcall_debug)
   1280		return;
   1281	trace_initcall_finish_cb(&initcall_calltime, fn, ret);
   1282}
   1283#endif /* !TRACEPOINTS_ENABLED */
   1284
   1285int __init_or_module do_one_initcall(initcall_t fn)
   1286{
   1287	int count = preempt_count();
   1288	char msgbuf[64];
   1289	int ret;
   1290
   1291	if (initcall_blacklisted(fn))
   1292		return -EPERM;
   1293
   1294	do_trace_initcall_start(fn);
   1295	ret = fn();
   1296	do_trace_initcall_finish(fn, ret);
   1297
   1298	msgbuf[0] = 0;
   1299
   1300	if (preempt_count() != count) {
   1301		sprintf(msgbuf, "preemption imbalance ");
   1302		preempt_count_set(count);
   1303	}
   1304	if (irqs_disabled()) {
   1305		strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
   1306		local_irq_enable();
   1307	}
   1308	WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf);
   1309
   1310	add_latent_entropy();
   1311	return ret;
   1312}
   1313
   1314
   1315extern initcall_entry_t __initcall_start[];
   1316extern initcall_entry_t __initcall0_start[];
   1317extern initcall_entry_t __initcall1_start[];
   1318extern initcall_entry_t __initcall2_start[];
   1319extern initcall_entry_t __initcall3_start[];
   1320extern initcall_entry_t __initcall4_start[];
   1321extern initcall_entry_t __initcall5_start[];
   1322extern initcall_entry_t __initcall6_start[];
   1323extern initcall_entry_t __initcall7_start[];
   1324extern initcall_entry_t __initcall_end[];
   1325
   1326static initcall_entry_t *initcall_levels[] __initdata = {
   1327	__initcall0_start,
   1328	__initcall1_start,
   1329	__initcall2_start,
   1330	__initcall3_start,
   1331	__initcall4_start,
   1332	__initcall5_start,
   1333	__initcall6_start,
   1334	__initcall7_start,
   1335	__initcall_end,
   1336};
   1337
   1338/* Keep these in sync with initcalls in include/linux/init.h */
   1339static const char *initcall_level_names[] __initdata = {
   1340	"pure",
   1341	"core",
   1342	"postcore",
   1343	"arch",
   1344	"subsys",
   1345	"fs",
   1346	"device",
   1347	"late",
   1348};
   1349
   1350static int __init ignore_unknown_bootoption(char *param, char *val,
   1351			       const char *unused, void *arg)
   1352{
   1353	return 0;
   1354}
   1355
   1356static void __init do_initcall_level(int level, char *command_line)
   1357{
   1358	initcall_entry_t *fn;
   1359
   1360	parse_args(initcall_level_names[level],
   1361		   command_line, __start___param,
   1362		   __stop___param - __start___param,
   1363		   level, level,
   1364		   NULL, ignore_unknown_bootoption);
   1365
   1366	trace_initcall_level(initcall_level_names[level]);
   1367	for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
   1368		do_one_initcall(initcall_from_entry(fn));
   1369}
   1370
   1371static void __init do_initcalls(void)
   1372{
   1373	int level;
   1374	size_t len = strlen(saved_command_line) + 1;
   1375	char *command_line;
   1376
   1377	command_line = kzalloc(len, GFP_KERNEL);
   1378	if (!command_line)
   1379		panic("%s: Failed to allocate %zu bytes\n", __func__, len);
   1380
   1381	for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
   1382		/* Parser modifies command_line, restore it each time */
   1383		strcpy(command_line, saved_command_line);
   1384		do_initcall_level(level, command_line);
   1385	}
   1386
   1387	kfree(command_line);
   1388}
   1389
   1390/*
   1391 * Ok, the machine is now initialized. None of the devices
   1392 * have been touched yet, but the CPU subsystem is up and
   1393 * running, and memory and process management works.
   1394 *
   1395 * Now we can finally start doing some real work..
   1396 */
   1397static void __init do_basic_setup(void)
   1398{
   1399	cpuset_init_smp();
   1400	driver_init();
   1401	init_irq_proc();
   1402	do_ctors();
   1403	do_initcalls();
   1404}
   1405
   1406static void __init do_pre_smp_initcalls(void)
   1407{
   1408	initcall_entry_t *fn;
   1409
   1410	trace_initcall_level("early");
   1411	for (fn = __initcall_start; fn < __initcall0_start; fn++)
   1412		do_one_initcall(initcall_from_entry(fn));
   1413}
   1414
   1415static int run_init_process(const char *init_filename)
   1416{
   1417	const char *const *p;
   1418
   1419	argv_init[0] = init_filename;
   1420	pr_info("Run %s as init process\n", init_filename);
   1421	pr_debug("  with arguments:\n");
   1422	for (p = argv_init; *p; p++)
   1423		pr_debug("    %s\n", *p);
   1424	pr_debug("  with environment:\n");
   1425	for (p = envp_init; *p; p++)
   1426		pr_debug("    %s\n", *p);
   1427	return kernel_execve(init_filename, argv_init, envp_init);
   1428}
   1429
   1430static int try_to_run_init_process(const char *init_filename)
   1431{
   1432	int ret;
   1433
   1434	ret = run_init_process(init_filename);
   1435
   1436	if (ret && ret != -ENOENT) {
   1437		pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
   1438		       init_filename, ret);
   1439	}
   1440
   1441	return ret;
   1442}
   1443
   1444static noinline void __init kernel_init_freeable(void);
   1445
   1446#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
   1447bool rodata_enabled __ro_after_init = true;
   1448static int __init set_debug_rodata(char *str)
   1449{
   1450	if (strtobool(str, &rodata_enabled))
   1451		pr_warn("Invalid option string for rodata: '%s'\n", str);
   1452	return 1;
   1453}
   1454__setup("rodata=", set_debug_rodata);
   1455#endif
   1456
   1457#ifdef CONFIG_STRICT_KERNEL_RWX
   1458static void mark_readonly(void)
   1459{
   1460	if (rodata_enabled) {
   1461		/*
   1462		 * load_module() results in W+X mappings, which are cleaned
   1463		 * up with call_rcu().  Let's make sure that queued work is
   1464		 * flushed so that we don't hit false positives looking for
   1465		 * insecure pages which are W+X.
   1466		 */
   1467		rcu_barrier();
   1468		mark_rodata_ro();
   1469		rodata_test();
   1470	} else
   1471		pr_info("Kernel memory protection disabled.\n");
   1472}
   1473#elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
   1474static inline void mark_readonly(void)
   1475{
   1476	pr_warn("Kernel memory protection not selected by kernel config.\n");
   1477}
   1478#else
   1479static inline void mark_readonly(void)
   1480{
   1481	pr_warn("This architecture does not have kernel memory protection.\n");
   1482}
   1483#endif
   1484
   1485void __weak free_initmem(void)
   1486{
   1487	free_initmem_default(POISON_FREE_INITMEM);
   1488}
   1489
   1490static int __ref kernel_init(void *unused)
   1491{
   1492	int ret;
   1493
   1494	/*
   1495	 * Wait until kthreadd is all set-up.
   1496	 */
   1497	wait_for_completion(&kthreadd_done);
   1498
   1499	kernel_init_freeable();
   1500	/* need to finish all async __init code before freeing the memory */
   1501	async_synchronize_full();
   1502
   1503	system_state = SYSTEM_FREEING_INITMEM;
   1504	kprobe_free_init_mem();
   1505	ftrace_free_init_mem();
   1506	kgdb_free_init_mem();
   1507	exit_boot_config();
   1508	free_initmem();
   1509	mark_readonly();
   1510
   1511	/*
   1512	 * Kernel mappings are now finalized - update the userspace page-table
   1513	 * to finalize PTI.
   1514	 */
   1515	pti_finalize();
   1516
   1517	system_state = SYSTEM_RUNNING;
   1518	numa_default_policy();
   1519
   1520	rcu_end_inkernel_boot();
   1521
   1522	do_sysctl_args();
   1523
   1524	if (ramdisk_execute_command) {
   1525		ret = run_init_process(ramdisk_execute_command);
   1526		if (!ret)
   1527			return 0;
   1528		pr_err("Failed to execute %s (error %d)\n",
   1529		       ramdisk_execute_command, ret);
   1530	}
   1531
   1532	/*
   1533	 * We try each of these until one succeeds.
   1534	 *
   1535	 * The Bourne shell can be used instead of init if we are
   1536	 * trying to recover a really broken machine.
   1537	 */
   1538	if (execute_command) {
   1539		ret = run_init_process(execute_command);
   1540		if (!ret)
   1541			return 0;
   1542		panic("Requested init %s failed (error %d).",
   1543		      execute_command, ret);
   1544	}
   1545
   1546	if (CONFIG_DEFAULT_INIT[0] != '\0') {
   1547		ret = run_init_process(CONFIG_DEFAULT_INIT);
   1548		if (ret)
   1549			pr_err("Default init %s failed (error %d)\n",
   1550			       CONFIG_DEFAULT_INIT, ret);
   1551		else
   1552			return 0;
   1553	}
   1554
   1555	if (!try_to_run_init_process("/sbin/init") ||
   1556	    !try_to_run_init_process("/etc/init") ||
   1557	    !try_to_run_init_process("/bin/init") ||
   1558	    !try_to_run_init_process("/bin/sh"))
   1559		return 0;
   1560
   1561	panic("No working init found.  Try passing init= option to kernel. "
   1562	      "See Linux Documentation/admin-guide/init.rst for guidance.");
   1563}
   1564
   1565/* Open /dev/console, for stdin/stdout/stderr, this should never fail */
   1566void __init console_on_rootfs(void)
   1567{
   1568	struct file *file = filp_open("/dev/console", O_RDWR, 0);
   1569
   1570	if (IS_ERR(file)) {
   1571		pr_err("Warning: unable to open an initial console.\n");
   1572		return;
   1573	}
   1574	init_dup(file);
   1575	init_dup(file);
   1576	init_dup(file);
   1577	fput(file);
   1578}
   1579
   1580static noinline void __init kernel_init_freeable(void)
   1581{
   1582	/* Now the scheduler is fully set up and can do blocking allocations */
   1583	gfp_allowed_mask = __GFP_BITS_MASK;
   1584
   1585	/*
   1586	 * init can allocate pages on any node
   1587	 */
   1588	set_mems_allowed(node_states[N_MEMORY]);
   1589
   1590	cad_pid = get_pid(task_pid(current));
   1591
   1592	smp_prepare_cpus(setup_max_cpus);
   1593
   1594	workqueue_init();
   1595
   1596	init_mm_internals();
   1597
   1598	rcu_init_tasks_generic();
   1599	do_pre_smp_initcalls();
   1600	lockup_detector_init();
   1601
   1602	smp_init();
   1603	sched_init_smp();
   1604
   1605	padata_init();
   1606	page_alloc_init_late();
   1607	/* Initialize page ext after all struct pages are initialized. */
   1608	page_ext_init();
   1609
   1610	do_basic_setup();
   1611
   1612	kunit_run_all_tests();
   1613
   1614	wait_for_initramfs();
   1615	console_on_rootfs();
   1616
   1617	/*
   1618	 * check if there is an early userspace init.  If yes, let it do all
   1619	 * the work
   1620	 */
   1621	if (init_eaccess(ramdisk_execute_command) != 0) {
   1622		ramdisk_execute_command = NULL;
   1623		prepare_namespace();
   1624	}
   1625
   1626	/*
   1627	 * Ok, we have completed the initial bootup, and
   1628	 * we're essentially up and running. Get rid of the
   1629	 * initmem segments and start the user-mode stuff..
   1630	 *
   1631	 * rootfs is available now, try loading the public keys
   1632	 * and default modules
   1633	 */
   1634
   1635	integrity_load_keys();
   1636}