timekeeping.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
timekeeping.c (72636B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 *  Kernel timekeeping code and accessor functions. Based on code from
      4 *  timer.c, moved in commit 8524070b7982.
      5 */
      6#include <linux/timekeeper_internal.h>
      7#include <linux/module.h>
      8#include <linux/interrupt.h>
      9#include <linux/percpu.h>
     10#include <linux/init.h>
     11#include <linux/mm.h>
     12#include <linux/nmi.h>
     13#include <linux/sched.h>
     14#include <linux/sched/loadavg.h>
     15#include <linux/sched/clock.h>
     16#include <linux/syscore_ops.h>
     17#include <linux/clocksource.h>
     18#include <linux/jiffies.h>
     19#include <linux/time.h>
     20#include <linux/timex.h>
     21#include <linux/tick.h>
     22#include <linux/stop_machine.h>
     23#include <linux/pvclock_gtod.h>
     24#include <linux/compiler.h>
     25#include <linux/audit.h>
     26
     27#include "tick-internal.h"
     28#include "ntp_internal.h"
     29#include "timekeeping_internal.h"
     30
     31#define TK_CLEAR_NTP		(1 << 0)
     32#define TK_MIRROR		(1 << 1)
     33#define TK_CLOCK_WAS_SET	(1 << 2)
     34
     35enum timekeeping_adv_mode {
     36	/* Update timekeeper when a tick has passed */
     37	TK_ADV_TICK,
     38
     39	/* Update timekeeper on a direct frequency change */
     40	TK_ADV_FREQ
     41};
     42
     43DEFINE_RAW_SPINLOCK(timekeeper_lock);
     44
     45/*
     46 * The most important data for readout fits into a single 64 byte
     47 * cache line.
     48 */
     49static struct {
     50	seqcount_raw_spinlock_t	seq;
     51	struct timekeeper	timekeeper;
     52} tk_core ____cacheline_aligned = {
     53	.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
     54};
     55
     56static struct timekeeper shadow_timekeeper;
     57
     58/* flag for if timekeeping is suspended */
     59int __read_mostly timekeeping_suspended;
     60
     61/**
     62 * struct tk_fast - NMI safe timekeeper
     63 * @seq:	Sequence counter for protecting updates. The lowest bit
     64 *		is the index for the tk_read_base array
     65 * @base:	tk_read_base array. Access is indexed by the lowest bit of
     66 *		@seq.
     67 *
     68 * See @update_fast_timekeeper() below.
     69 */
     70struct tk_fast {
     71	seqcount_latch_t	seq;
     72	struct tk_read_base	base[2];
     73};
     74
     75/* Suspend-time cycles value for halted fast timekeeper. */
     76static u64 cycles_at_suspend;
     77
     78static u64 dummy_clock_read(struct clocksource *cs)
     79{
     80	if (timekeeping_suspended)
     81		return cycles_at_suspend;
     82	return local_clock();
     83}
     84
     85static struct clocksource dummy_clock = {
     86	.read = dummy_clock_read,
     87};
     88
     89/*
     90 * Boot time initialization which allows local_clock() to be utilized
     91 * during early boot when clocksources are not available. local_clock()
     92 * returns nanoseconds already so no conversion is required, hence mult=1
     93 * and shift=0. When the first proper clocksource is installed then
     94 * the fast time keepers are updated with the correct values.
     95 */
     96#define FAST_TK_INIT						\
     97	{							\
     98		.clock		= &dummy_clock,			\
     99		.mask		= CLOCKSOURCE_MASK(64),		\
    100		.mult		= 1,				\
    101		.shift		= 0,				\
    102	}
    103
    104static struct tk_fast tk_fast_mono ____cacheline_aligned = {
    105	.seq     = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
    106	.base[0] = FAST_TK_INIT,
    107	.base[1] = FAST_TK_INIT,
    108};
    109
    110static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
    111	.seq     = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
    112	.base[0] = FAST_TK_INIT,
    113	.base[1] = FAST_TK_INIT,
    114};
    115
    116static inline void tk_normalize_xtime(struct timekeeper *tk)
    117{
    118	while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
    119		tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
    120		tk->xtime_sec++;
    121	}
    122	while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
    123		tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
    124		tk->raw_sec++;
    125	}
    126}
    127
    128static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
    129{
    130	struct timespec64 ts;
    131
    132	ts.tv_sec = tk->xtime_sec;
    133	ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
    134	return ts;
    135}
    136
    137static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
    138{
    139	tk->xtime_sec = ts->tv_sec;
    140	tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
    141}
    142
    143static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
    144{
    145	tk->xtime_sec += ts->tv_sec;
    146	tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
    147	tk_normalize_xtime(tk);
    148}
    149
    150static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
    151{
    152	struct timespec64 tmp;
    153
    154	/*
    155	 * Verify consistency of: offset_real = -wall_to_monotonic
    156	 * before modifying anything
    157	 */
    158	set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
    159					-tk->wall_to_monotonic.tv_nsec);
    160	WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
    161	tk->wall_to_monotonic = wtm;
    162	set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
    163	tk->offs_real = timespec64_to_ktime(tmp);
    164	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
    165}
    166
    167static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
    168{
    169	tk->offs_boot = ktime_add(tk->offs_boot, delta);
    170	/*
    171	 * Timespec representation for VDSO update to avoid 64bit division
    172	 * on every update.
    173	 */
    174	tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
    175}
    176
    177/*
    178 * tk_clock_read - atomic clocksource read() helper
    179 *
    180 * This helper is necessary to use in the read paths because, while the
    181 * seqcount ensures we don't return a bad value while structures are updated,
    182 * it doesn't protect from potential crashes. There is the possibility that
    183 * the tkr's clocksource may change between the read reference, and the
    184 * clock reference passed to the read function.  This can cause crashes if
    185 * the wrong clocksource is passed to the wrong read function.
    186 * This isn't necessary to use when holding the timekeeper_lock or doing
    187 * a read of the fast-timekeeper tkrs (which is protected by its own locking
    188 * and update logic).
    189 */
    190static inline u64 tk_clock_read(const struct tk_read_base *tkr)
    191{
    192	struct clocksource *clock = READ_ONCE(tkr->clock);
    193
    194	return clock->read(clock);
    195}
    196
    197#ifdef CONFIG_DEBUG_TIMEKEEPING
    198#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
    199
    200static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
    201{
    202
    203	u64 max_cycles = tk->tkr_mono.clock->max_cycles;
    204	const char *name = tk->tkr_mono.clock->name;
    205
    206	if (offset > max_cycles) {
    207		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
    208				offset, name, max_cycles);
    209		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
    210	} else {
    211		if (offset > (max_cycles >> 1)) {
    212			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
    213					offset, name, max_cycles >> 1);
    214			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
    215		}
    216	}
    217
    218	if (tk->underflow_seen) {
    219		if (jiffies - tk->last_warning > WARNING_FREQ) {
    220			printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
    221			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
    222			printk_deferred("         Your kernel is probably still fine.\n");
    223			tk->last_warning = jiffies;
    224		}
    225		tk->underflow_seen = 0;
    226	}
    227
    228	if (tk->overflow_seen) {
    229		if (jiffies - tk->last_warning > WARNING_FREQ) {
    230			printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
    231			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
    232			printk_deferred("         Your kernel is probably still fine.\n");
    233			tk->last_warning = jiffies;
    234		}
    235		tk->overflow_seen = 0;
    236	}
    237}
    238
    239static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
    240{
    241	struct timekeeper *tk = &tk_core.timekeeper;
    242	u64 now, last, mask, max, delta;
    243	unsigned int seq;
    244
    245	/*
    246	 * Since we're called holding a seqcount, the data may shift
    247	 * under us while we're doing the calculation. This can cause
    248	 * false positives, since we'd note a problem but throw the
    249	 * results away. So nest another seqcount here to atomically
    250	 * grab the points we are checking with.
    251	 */
    252	do {
    253		seq = read_seqcount_begin(&tk_core.seq);
    254		now = tk_clock_read(tkr);
    255		last = tkr->cycle_last;
    256		mask = tkr->mask;
    257		max = tkr->clock->max_cycles;
    258	} while (read_seqcount_retry(&tk_core.seq, seq));
    259
    260	delta = clocksource_delta(now, last, mask);
    261
    262	/*
    263	 * Try to catch underflows by checking if we are seeing small
    264	 * mask-relative negative values.
    265	 */
    266	if (unlikely((~delta & mask) < (mask >> 3))) {
    267		tk->underflow_seen = 1;
    268		delta = 0;
    269	}
    270
    271	/* Cap delta value to the max_cycles values to avoid mult overflows */
    272	if (unlikely(delta > max)) {
    273		tk->overflow_seen = 1;
    274		delta = tkr->clock->max_cycles;
    275	}
    276
    277	return delta;
    278}
    279#else
    280static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
    281{
    282}
    283static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
    284{
    285	u64 cycle_now, delta;
    286
    287	/* read clocksource */
    288	cycle_now = tk_clock_read(tkr);
    289
    290	/* calculate the delta since the last update_wall_time */
    291	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
    292
    293	return delta;
    294}
    295#endif
    296
    297/**
    298 * tk_setup_internals - Set up internals to use clocksource clock.
    299 *
    300 * @tk:		The target timekeeper to setup.
    301 * @clock:		Pointer to clocksource.
    302 *
    303 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
    304 * pair and interval request.
    305 *
    306 * Unless you're the timekeeping code, you should not be using this!
    307 */
    308static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
    309{
    310	u64 interval;
    311	u64 tmp, ntpinterval;
    312	struct clocksource *old_clock;
    313
    314	++tk->cs_was_changed_seq;
    315	old_clock = tk->tkr_mono.clock;
    316	tk->tkr_mono.clock = clock;
    317	tk->tkr_mono.mask = clock->mask;
    318	tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
    319
    320	tk->tkr_raw.clock = clock;
    321	tk->tkr_raw.mask = clock->mask;
    322	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
    323
    324	/* Do the ns -> cycle conversion first, using original mult */
    325	tmp = NTP_INTERVAL_LENGTH;
    326	tmp <<= clock->shift;
    327	ntpinterval = tmp;
    328	tmp += clock->mult/2;
    329	do_div(tmp, clock->mult);
    330	if (tmp == 0)
    331		tmp = 1;
    332
    333	interval = (u64) tmp;
    334	tk->cycle_interval = interval;
    335
    336	/* Go back from cycles -> shifted ns */
    337	tk->xtime_interval = interval * clock->mult;
    338	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
    339	tk->raw_interval = interval * clock->mult;
    340
    341	 /* if changing clocks, convert xtime_nsec shift units */
    342	if (old_clock) {
    343		int shift_change = clock->shift - old_clock->shift;
    344		if (shift_change < 0) {
    345			tk->tkr_mono.xtime_nsec >>= -shift_change;
    346			tk->tkr_raw.xtime_nsec >>= -shift_change;
    347		} else {
    348			tk->tkr_mono.xtime_nsec <<= shift_change;
    349			tk->tkr_raw.xtime_nsec <<= shift_change;
    350		}
    351	}
    352
    353	tk->tkr_mono.shift = clock->shift;
    354	tk->tkr_raw.shift = clock->shift;
    355
    356	tk->ntp_error = 0;
    357	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
    358	tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
    359
    360	/*
    361	 * The timekeeper keeps its own mult values for the currently
    362	 * active clocksource. These value will be adjusted via NTP
    363	 * to counteract clock drifting.
    364	 */
    365	tk->tkr_mono.mult = clock->mult;
    366	tk->tkr_raw.mult = clock->mult;
    367	tk->ntp_err_mult = 0;
    368	tk->skip_second_overflow = 0;
    369}
    370
    371/* Timekeeper helper functions. */
    372
    373static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
    374{
    375	u64 nsec;
    376
    377	nsec = delta * tkr->mult + tkr->xtime_nsec;
    378	nsec >>= tkr->shift;
    379
    380	return nsec;
    381}
    382
    383static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
    384{
    385	u64 delta;
    386
    387	delta = timekeeping_get_delta(tkr);
    388	return timekeeping_delta_to_ns(tkr, delta);
    389}
    390
    391static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
    392{
    393	u64 delta;
    394
    395	/* calculate the delta since the last update_wall_time */
    396	delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
    397	return timekeeping_delta_to_ns(tkr, delta);
    398}
    399
    400/**
    401 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
    402 * @tkr: Timekeeping readout base from which we take the update
    403 * @tkf: Pointer to NMI safe timekeeper
    404 *
    405 * We want to use this from any context including NMI and tracing /
    406 * instrumenting the timekeeping code itself.
    407 *
    408 * Employ the latch technique; see @raw_write_seqcount_latch.
    409 *
    410 * So if a NMI hits the update of base[0] then it will use base[1]
    411 * which is still consistent. In the worst case this can result is a
    412 * slightly wrong timestamp (a few nanoseconds). See
    413 * @ktime_get_mono_fast_ns.
    414 */
    415static void update_fast_timekeeper(const struct tk_read_base *tkr,
    416				   struct tk_fast *tkf)
    417{
    418	struct tk_read_base *base = tkf->base;
    419
    420	/* Force readers off to base[1] */
    421	raw_write_seqcount_latch(&tkf->seq);
    422
    423	/* Update base[0] */
    424	memcpy(base, tkr, sizeof(*base));
    425
    426	/* Force readers back to base[0] */
    427	raw_write_seqcount_latch(&tkf->seq);
    428
    429	/* Update base[1] */
    430	memcpy(base + 1, base, sizeof(*base));
    431}
    432
    433static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
    434{
    435	u64 delta, cycles = tk_clock_read(tkr);
    436
    437	delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
    438	return timekeeping_delta_to_ns(tkr, delta);
    439}
    440
    441static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
    442{
    443	struct tk_read_base *tkr;
    444	unsigned int seq;
    445	u64 now;
    446
    447	do {
    448		seq = raw_read_seqcount_latch(&tkf->seq);
    449		tkr = tkf->base + (seq & 0x01);
    450		now = ktime_to_ns(tkr->base);
    451		now += fast_tk_get_delta_ns(tkr);
    452	} while (read_seqcount_latch_retry(&tkf->seq, seq));
    453
    454	return now;
    455}
    456
    457/**
    458 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
    459 *
    460 * This timestamp is not guaranteed to be monotonic across an update.
    461 * The timestamp is calculated by:
    462 *
    463 *	now = base_mono + clock_delta * slope
    464 *
    465 * So if the update lowers the slope, readers who are forced to the
    466 * not yet updated second array are still using the old steeper slope.
    467 *
    468 * tmono
    469 * ^
    470 * |    o  n
    471 * |   o n
    472 * |  u
    473 * | o
    474 * |o
    475 * |12345678---> reader order
    476 *
    477 * o = old slope
    478 * u = update
    479 * n = new slope
    480 *
    481 * So reader 6 will observe time going backwards versus reader 5.
    482 *
    483 * While other CPUs are likely to be able to observe that, the only way
    484 * for a CPU local observation is when an NMI hits in the middle of
    485 * the update. Timestamps taken from that NMI context might be ahead
    486 * of the following timestamps. Callers need to be aware of that and
    487 * deal with it.
    488 */
    489u64 notrace ktime_get_mono_fast_ns(void)
    490{
    491	return __ktime_get_fast_ns(&tk_fast_mono);
    492}
    493EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
    494
    495/**
    496 * ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw
    497 *
    498 * Contrary to ktime_get_mono_fast_ns() this is always correct because the
    499 * conversion factor is not affected by NTP/PTP correction.
    500 */
    501u64 notrace ktime_get_raw_fast_ns(void)
    502{
    503	return __ktime_get_fast_ns(&tk_fast_raw);
    504}
    505EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
    506
    507/**
    508 * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
    509 *
    510 * To keep it NMI safe since we're accessing from tracing, we're not using a
    511 * separate timekeeper with updates to monotonic clock and boot offset
    512 * protected with seqcounts. This has the following minor side effects:
    513 *
    514 * (1) Its possible that a timestamp be taken after the boot offset is updated
    515 * but before the timekeeper is updated. If this happens, the new boot offset
    516 * is added to the old timekeeping making the clock appear to update slightly
    517 * earlier:
    518 *    CPU 0                                        CPU 1
    519 *    timekeeping_inject_sleeptime64()
    520 *    __timekeeping_inject_sleeptime(tk, delta);
    521 *                                                 timestamp();
    522 *    timekeeping_update(tk, TK_CLEAR_NTP...);
    523 *
    524 * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
    525 * partially updated.  Since the tk->offs_boot update is a rare event, this
    526 * should be a rare occurrence which postprocessing should be able to handle.
    527 *
    528 * The caveats vs. timestamp ordering as documented for ktime_get_fast_ns()
    529 * apply as well.
    530 */
    531u64 notrace ktime_get_boot_fast_ns(void)
    532{
    533	struct timekeeper *tk = &tk_core.timekeeper;
    534
    535	return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
    536}
    537EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
    538
    539/**
    540 * ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
    541 *
    542 * The same limitations as described for ktime_get_boot_fast_ns() apply. The
    543 * mono time and the TAI offset are not read atomically which may yield wrong
    544 * readouts. However, an update of the TAI offset is an rare event e.g., caused
    545 * by settime or adjtimex with an offset. The user of this function has to deal
    546 * with the possibility of wrong timestamps in post processing.
    547 */
    548u64 notrace ktime_get_tai_fast_ns(void)
    549{
    550	struct timekeeper *tk = &tk_core.timekeeper;
    551
    552	return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
    553}
    554EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
    555
    556static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
    557{
    558	struct tk_read_base *tkr;
    559	u64 basem, baser, delta;
    560	unsigned int seq;
    561
    562	do {
    563		seq = raw_read_seqcount_latch(&tkf->seq);
    564		tkr = tkf->base + (seq & 0x01);
    565		basem = ktime_to_ns(tkr->base);
    566		baser = ktime_to_ns(tkr->base_real);
    567		delta = fast_tk_get_delta_ns(tkr);
    568	} while (read_seqcount_latch_retry(&tkf->seq, seq));
    569
    570	if (mono)
    571		*mono = basem + delta;
    572	return baser + delta;
    573}
    574
    575/**
    576 * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
    577 *
    578 * See ktime_get_fast_ns() for documentation of the time stamp ordering.
    579 */
    580u64 ktime_get_real_fast_ns(void)
    581{
    582	return __ktime_get_real_fast(&tk_fast_mono, NULL);
    583}
    584EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
    585
    586/**
    587 * ktime_get_fast_timestamps: - NMI safe timestamps
    588 * @snapshot:	Pointer to timestamp storage
    589 *
    590 * Stores clock monotonic, boottime and realtime timestamps.
    591 *
    592 * Boot time is a racy access on 32bit systems if the sleep time injection
    593 * happens late during resume and not in timekeeping_resume(). That could
    594 * be avoided by expanding struct tk_read_base with boot offset for 32bit
    595 * and adding more overhead to the update. As this is a hard to observe
    596 * once per resume event which can be filtered with reasonable effort using
    597 * the accurate mono/real timestamps, it's probably not worth the trouble.
    598 *
    599 * Aside of that it might be possible on 32 and 64 bit to observe the
    600 * following when the sleep time injection happens late:
    601 *
    602 * CPU 0				CPU 1
    603 * timekeeping_resume()
    604 * ktime_get_fast_timestamps()
    605 *	mono, real = __ktime_get_real_fast()
    606 *					inject_sleep_time()
    607 *					   update boot offset
    608 *	boot = mono + bootoffset;
    609 *
    610 * That means that boot time already has the sleep time adjustment, but
    611 * real time does not. On the next readout both are in sync again.
    612 *
    613 * Preventing this for 64bit is not really feasible without destroying the
    614 * careful cache layout of the timekeeper because the sequence count and
    615 * struct tk_read_base would then need two cache lines instead of one.
    616 *
    617 * Access to the time keeper clock source is disabled across the innermost
    618 * steps of suspend/resume. The accessors still work, but the timestamps
    619 * are frozen until time keeping is resumed which happens very early.
    620 *
    621 * For regular suspend/resume there is no observable difference vs. sched
    622 * clock, but it might affect some of the nasty low level debug printks.
    623 *
    624 * OTOH, access to sched clock is not guaranteed across suspend/resume on
    625 * all systems either so it depends on the hardware in use.
    626 *
    627 * If that turns out to be a real problem then this could be mitigated by
    628 * using sched clock in a similar way as during early boot. But it's not as
    629 * trivial as on early boot because it needs some careful protection
    630 * against the clock monotonic timestamp jumping backwards on resume.
    631 */
    632void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
    633{
    634	struct timekeeper *tk = &tk_core.timekeeper;
    635
    636	snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
    637	snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
    638}
    639
    640/**
    641 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
    642 * @tk: Timekeeper to snapshot.
    643 *
    644 * It generally is unsafe to access the clocksource after timekeeping has been
    645 * suspended, so take a snapshot of the readout base of @tk and use it as the
    646 * fast timekeeper's readout base while suspended.  It will return the same
    647 * number of cycles every time until timekeeping is resumed at which time the
    648 * proper readout base for the fast timekeeper will be restored automatically.
    649 */
    650static void halt_fast_timekeeper(const struct timekeeper *tk)
    651{
    652	static struct tk_read_base tkr_dummy;
    653	const struct tk_read_base *tkr = &tk->tkr_mono;
    654
    655	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
    656	cycles_at_suspend = tk_clock_read(tkr);
    657	tkr_dummy.clock = &dummy_clock;
    658	tkr_dummy.base_real = tkr->base + tk->offs_real;
    659	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
    660
    661	tkr = &tk->tkr_raw;
    662	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
    663	tkr_dummy.clock = &dummy_clock;
    664	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
    665}
    666
    667static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
    668
    669static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
    670{
    671	raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
    672}
    673
    674/**
    675 * pvclock_gtod_register_notifier - register a pvclock timedata update listener
    676 * @nb: Pointer to the notifier block to register
    677 */
    678int pvclock_gtod_register_notifier(struct notifier_block *nb)
    679{
    680	struct timekeeper *tk = &tk_core.timekeeper;
    681	unsigned long flags;
    682	int ret;
    683
    684	raw_spin_lock_irqsave(&timekeeper_lock, flags);
    685	ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
    686	update_pvclock_gtod(tk, true);
    687	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
    688
    689	return ret;
    690}
    691EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
    692
    693/**
    694 * pvclock_gtod_unregister_notifier - unregister a pvclock
    695 * timedata update listener
    696 * @nb: Pointer to the notifier block to unregister
    697 */
    698int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
    699{
    700	unsigned long flags;
    701	int ret;
    702
    703	raw_spin_lock_irqsave(&timekeeper_lock, flags);
    704	ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
    705	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
    706
    707	return ret;
    708}
    709EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
    710
    711/*
    712 * tk_update_leap_state - helper to update the next_leap_ktime
    713 */
    714static inline void tk_update_leap_state(struct timekeeper *tk)
    715{
    716	tk->next_leap_ktime = ntp_get_next_leap();
    717	if (tk->next_leap_ktime != KTIME_MAX)
    718		/* Convert to monotonic time */
    719		tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
    720}
    721
    722/*
    723 * Update the ktime_t based scalar nsec members of the timekeeper
    724 */
    725static inline void tk_update_ktime_data(struct timekeeper *tk)
    726{
    727	u64 seconds;
    728	u32 nsec;
    729
    730	/*
    731	 * The xtime based monotonic readout is:
    732	 *	nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
    733	 * The ktime based monotonic readout is:
    734	 *	nsec = base_mono + now();
    735	 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
    736	 */
    737	seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
    738	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
    739	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
    740
    741	/*
    742	 * The sum of the nanoseconds portions of xtime and
    743	 * wall_to_monotonic can be greater/equal one second. Take
    744	 * this into account before updating tk->ktime_sec.
    745	 */
    746	nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
    747	if (nsec >= NSEC_PER_SEC)
    748		seconds++;
    749	tk->ktime_sec = seconds;
    750
    751	/* Update the monotonic raw base */
    752	tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
    753}
    754
    755/* must hold timekeeper_lock */
    756static void timekeeping_update(struct timekeeper *tk, unsigned int action)
    757{
    758	if (action & TK_CLEAR_NTP) {
    759		tk->ntp_error = 0;
    760		ntp_clear();
    761	}
    762
    763	tk_update_leap_state(tk);
    764	tk_update_ktime_data(tk);
    765
    766	update_vsyscall(tk);
    767	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
    768
    769	tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
    770	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
    771	update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
    772
    773	if (action & TK_CLOCK_WAS_SET)
    774		tk->clock_was_set_seq++;
    775	/*
    776	 * The mirroring of the data to the shadow-timekeeper needs
    777	 * to happen last here to ensure we don't over-write the
    778	 * timekeeper structure on the next update with stale data
    779	 */
    780	if (action & TK_MIRROR)
    781		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
    782		       sizeof(tk_core.timekeeper));
    783}
    784
    785/**
    786 * timekeeping_forward_now - update clock to the current time
    787 * @tk:		Pointer to the timekeeper to update
    788 *
    789 * Forward the current clock to update its state since the last call to
    790 * update_wall_time(). This is useful before significant clock changes,
    791 * as it avoids having to deal with this time offset explicitly.
    792 */
    793static void timekeeping_forward_now(struct timekeeper *tk)
    794{
    795	u64 cycle_now, delta;
    796
    797	cycle_now = tk_clock_read(&tk->tkr_mono);
    798	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
    799	tk->tkr_mono.cycle_last = cycle_now;
    800	tk->tkr_raw.cycle_last  = cycle_now;
    801
    802	tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
    803	tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
    804
    805	tk_normalize_xtime(tk);
    806}
    807
    808/**
    809 * ktime_get_real_ts64 - Returns the time of day in a timespec64.
    810 * @ts:		pointer to the timespec to be set
    811 *
    812 * Returns the time of day in a timespec64 (WARN if suspended).
    813 */
    814void ktime_get_real_ts64(struct timespec64 *ts)
    815{
    816	struct timekeeper *tk = &tk_core.timekeeper;
    817	unsigned int seq;
    818	u64 nsecs;
    819
    820	WARN_ON(timekeeping_suspended);
    821
    822	do {
    823		seq = read_seqcount_begin(&tk_core.seq);
    824
    825		ts->tv_sec = tk->xtime_sec;
    826		nsecs = timekeeping_get_ns(&tk->tkr_mono);
    827
    828	} while (read_seqcount_retry(&tk_core.seq, seq));
    829
    830	ts->tv_nsec = 0;
    831	timespec64_add_ns(ts, nsecs);
    832}
    833EXPORT_SYMBOL(ktime_get_real_ts64);
    834
    835ktime_t ktime_get(void)
    836{
    837	struct timekeeper *tk = &tk_core.timekeeper;
    838	unsigned int seq;
    839	ktime_t base;
    840	u64 nsecs;
    841
    842	WARN_ON(timekeeping_suspended);
    843
    844	do {
    845		seq = read_seqcount_begin(&tk_core.seq);
    846		base = tk->tkr_mono.base;
    847		nsecs = timekeeping_get_ns(&tk->tkr_mono);
    848
    849	} while (read_seqcount_retry(&tk_core.seq, seq));
    850
    851	return ktime_add_ns(base, nsecs);
    852}
    853EXPORT_SYMBOL_GPL(ktime_get);
    854
    855u32 ktime_get_resolution_ns(void)
    856{
    857	struct timekeeper *tk = &tk_core.timekeeper;
    858	unsigned int seq;
    859	u32 nsecs;
    860
    861	WARN_ON(timekeeping_suspended);
    862
    863	do {
    864		seq = read_seqcount_begin(&tk_core.seq);
    865		nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
    866	} while (read_seqcount_retry(&tk_core.seq, seq));
    867
    868	return nsecs;
    869}
    870EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
    871
    872static ktime_t *offsets[TK_OFFS_MAX] = {
    873	[TK_OFFS_REAL]	= &tk_core.timekeeper.offs_real,
    874	[TK_OFFS_BOOT]	= &tk_core.timekeeper.offs_boot,
    875	[TK_OFFS_TAI]	= &tk_core.timekeeper.offs_tai,
    876};
    877
    878ktime_t ktime_get_with_offset(enum tk_offsets offs)
    879{
    880	struct timekeeper *tk = &tk_core.timekeeper;
    881	unsigned int seq;
    882	ktime_t base, *offset = offsets[offs];
    883	u64 nsecs;
    884
    885	WARN_ON(timekeeping_suspended);
    886
    887	do {
    888		seq = read_seqcount_begin(&tk_core.seq);
    889		base = ktime_add(tk->tkr_mono.base, *offset);
    890		nsecs = timekeeping_get_ns(&tk->tkr_mono);
    891
    892	} while (read_seqcount_retry(&tk_core.seq, seq));
    893
    894	return ktime_add_ns(base, nsecs);
    895
    896}
    897EXPORT_SYMBOL_GPL(ktime_get_with_offset);
    898
    899ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
    900{
    901	struct timekeeper *tk = &tk_core.timekeeper;
    902	unsigned int seq;
    903	ktime_t base, *offset = offsets[offs];
    904	u64 nsecs;
    905
    906	WARN_ON(timekeeping_suspended);
    907
    908	do {
    909		seq = read_seqcount_begin(&tk_core.seq);
    910		base = ktime_add(tk->tkr_mono.base, *offset);
    911		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
    912
    913	} while (read_seqcount_retry(&tk_core.seq, seq));
    914
    915	return ktime_add_ns(base, nsecs);
    916}
    917EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
    918
    919/**
    920 * ktime_mono_to_any() - convert monotonic time to any other time
    921 * @tmono:	time to convert.
    922 * @offs:	which offset to use
    923 */
    924ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
    925{
    926	ktime_t *offset = offsets[offs];
    927	unsigned int seq;
    928	ktime_t tconv;
    929
    930	do {
    931		seq = read_seqcount_begin(&tk_core.seq);
    932		tconv = ktime_add(tmono, *offset);
    933	} while (read_seqcount_retry(&tk_core.seq, seq));
    934
    935	return tconv;
    936}
    937EXPORT_SYMBOL_GPL(ktime_mono_to_any);
    938
    939/**
    940 * ktime_get_raw - Returns the raw monotonic time in ktime_t format
    941 */
    942ktime_t ktime_get_raw(void)
    943{
    944	struct timekeeper *tk = &tk_core.timekeeper;
    945	unsigned int seq;
    946	ktime_t base;
    947	u64 nsecs;
    948
    949	do {
    950		seq = read_seqcount_begin(&tk_core.seq);
    951		base = tk->tkr_raw.base;
    952		nsecs = timekeeping_get_ns(&tk->tkr_raw);
    953
    954	} while (read_seqcount_retry(&tk_core.seq, seq));
    955
    956	return ktime_add_ns(base, nsecs);
    957}
    958EXPORT_SYMBOL_GPL(ktime_get_raw);
    959
    960/**
    961 * ktime_get_ts64 - get the monotonic clock in timespec64 format
    962 * @ts:		pointer to timespec variable
    963 *
    964 * The function calculates the monotonic clock from the realtime
    965 * clock and the wall_to_monotonic offset and stores the result
    966 * in normalized timespec64 format in the variable pointed to by @ts.
    967 */
    968void ktime_get_ts64(struct timespec64 *ts)
    969{
    970	struct timekeeper *tk = &tk_core.timekeeper;
    971	struct timespec64 tomono;
    972	unsigned int seq;
    973	u64 nsec;
    974
    975	WARN_ON(timekeeping_suspended);
    976
    977	do {
    978		seq = read_seqcount_begin(&tk_core.seq);
    979		ts->tv_sec = tk->xtime_sec;
    980		nsec = timekeeping_get_ns(&tk->tkr_mono);
    981		tomono = tk->wall_to_monotonic;
    982
    983	} while (read_seqcount_retry(&tk_core.seq, seq));
    984
    985	ts->tv_sec += tomono.tv_sec;
    986	ts->tv_nsec = 0;
    987	timespec64_add_ns(ts, nsec + tomono.tv_nsec);
    988}
    989EXPORT_SYMBOL_GPL(ktime_get_ts64);
    990
    991/**
    992 * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
    993 *
    994 * Returns the seconds portion of CLOCK_MONOTONIC with a single non
    995 * serialized read. tk->ktime_sec is of type 'unsigned long' so this
    996 * works on both 32 and 64 bit systems. On 32 bit systems the readout
    997 * covers ~136 years of uptime which should be enough to prevent
    998 * premature wrap arounds.
    999 */
   1000time64_t ktime_get_seconds(void)
   1001{
   1002	struct timekeeper *tk = &tk_core.timekeeper;
   1003
   1004	WARN_ON(timekeeping_suspended);
   1005	return tk->ktime_sec;
   1006}
   1007EXPORT_SYMBOL_GPL(ktime_get_seconds);
   1008
   1009/**
   1010 * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
   1011 *
   1012 * Returns the wall clock seconds since 1970.
   1013 *
   1014 * For 64bit systems the fast access to tk->xtime_sec is preserved. On
   1015 * 32bit systems the access must be protected with the sequence
   1016 * counter to provide "atomic" access to the 64bit tk->xtime_sec
   1017 * value.
   1018 */
   1019time64_t ktime_get_real_seconds(void)
   1020{
   1021	struct timekeeper *tk = &tk_core.timekeeper;
   1022	time64_t seconds;
   1023	unsigned int seq;
   1024
   1025	if (IS_ENABLED(CONFIG_64BIT))
   1026		return tk->xtime_sec;
   1027
   1028	do {
   1029		seq = read_seqcount_begin(&tk_core.seq);
   1030		seconds = tk->xtime_sec;
   1031
   1032	} while (read_seqcount_retry(&tk_core.seq, seq));
   1033
   1034	return seconds;
   1035}
   1036EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
   1037
   1038/**
   1039 * __ktime_get_real_seconds - The same as ktime_get_real_seconds
   1040 * but without the sequence counter protect. This internal function
   1041 * is called just when timekeeping lock is already held.
   1042 */
   1043noinstr time64_t __ktime_get_real_seconds(void)
   1044{
   1045	struct timekeeper *tk = &tk_core.timekeeper;
   1046
   1047	return tk->xtime_sec;
   1048}
   1049
   1050/**
   1051 * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
   1052 * @systime_snapshot:	pointer to struct receiving the system time snapshot
   1053 */
   1054void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
   1055{
   1056	struct timekeeper *tk = &tk_core.timekeeper;
   1057	unsigned int seq;
   1058	ktime_t base_raw;
   1059	ktime_t base_real;
   1060	u64 nsec_raw;
   1061	u64 nsec_real;
   1062	u64 now;
   1063
   1064	WARN_ON_ONCE(timekeeping_suspended);
   1065
   1066	do {
   1067		seq = read_seqcount_begin(&tk_core.seq);
   1068		now = tk_clock_read(&tk->tkr_mono);
   1069		systime_snapshot->cs_id = tk->tkr_mono.clock->id;
   1070		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
   1071		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
   1072		base_real = ktime_add(tk->tkr_mono.base,
   1073				      tk_core.timekeeper.offs_real);
   1074		base_raw = tk->tkr_raw.base;
   1075		nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
   1076		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
   1077	} while (read_seqcount_retry(&tk_core.seq, seq));
   1078
   1079	systime_snapshot->cycles = now;
   1080	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
   1081	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
   1082}
   1083EXPORT_SYMBOL_GPL(ktime_get_snapshot);
   1084
   1085/* Scale base by mult/div checking for overflow */
   1086static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
   1087{
   1088	u64 tmp, rem;
   1089
   1090	tmp = div64_u64_rem(*base, div, &rem);
   1091
   1092	if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
   1093	    ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
   1094		return -EOVERFLOW;
   1095	tmp *= mult;
   1096
   1097	rem = div64_u64(rem * mult, div);
   1098	*base = tmp + rem;
   1099	return 0;
   1100}
   1101
   1102/**
   1103 * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
   1104 * @history:			Snapshot representing start of history
   1105 * @partial_history_cycles:	Cycle offset into history (fractional part)
   1106 * @total_history_cycles:	Total history length in cycles
   1107 * @discontinuity:		True indicates clock was set on history period
   1108 * @ts:				Cross timestamp that should be adjusted using
   1109 *	partial/total ratio
   1110 *
   1111 * Helper function used by get_device_system_crosststamp() to correct the
   1112 * crosstimestamp corresponding to the start of the current interval to the
   1113 * system counter value (timestamp point) provided by the driver. The
   1114 * total_history_* quantities are the total history starting at the provided
   1115 * reference point and ending at the start of the current interval. The cycle
   1116 * count between the driver timestamp point and the start of the current
   1117 * interval is partial_history_cycles.
   1118 */
   1119static int adjust_historical_crosststamp(struct system_time_snapshot *history,
   1120					 u64 partial_history_cycles,
   1121					 u64 total_history_cycles,
   1122					 bool discontinuity,
   1123					 struct system_device_crosststamp *ts)
   1124{
   1125	struct timekeeper *tk = &tk_core.timekeeper;
   1126	u64 corr_raw, corr_real;
   1127	bool interp_forward;
   1128	int ret;
   1129
   1130	if (total_history_cycles == 0 || partial_history_cycles == 0)
   1131		return 0;
   1132
   1133	/* Interpolate shortest distance from beginning or end of history */
   1134	interp_forward = partial_history_cycles > total_history_cycles / 2;
   1135	partial_history_cycles = interp_forward ?
   1136		total_history_cycles - partial_history_cycles :
   1137		partial_history_cycles;
   1138
   1139	/*
   1140	 * Scale the monotonic raw time delta by:
   1141	 *	partial_history_cycles / total_history_cycles
   1142	 */
   1143	corr_raw = (u64)ktime_to_ns(
   1144		ktime_sub(ts->sys_monoraw, history->raw));
   1145	ret = scale64_check_overflow(partial_history_cycles,
   1146				     total_history_cycles, &corr_raw);
   1147	if (ret)
   1148		return ret;
   1149
   1150	/*
   1151	 * If there is a discontinuity in the history, scale monotonic raw
   1152	 *	correction by:
   1153	 *	mult(real)/mult(raw) yielding the realtime correction
   1154	 * Otherwise, calculate the realtime correction similar to monotonic
   1155	 *	raw calculation
   1156	 */
   1157	if (discontinuity) {
   1158		corr_real = mul_u64_u32_div
   1159			(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
   1160	} else {
   1161		corr_real = (u64)ktime_to_ns(
   1162			ktime_sub(ts->sys_realtime, history->real));
   1163		ret = scale64_check_overflow(partial_history_cycles,
   1164					     total_history_cycles, &corr_real);
   1165		if (ret)
   1166			return ret;
   1167	}
   1168
   1169	/* Fixup monotonic raw and real time time values */
   1170	if (interp_forward) {
   1171		ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
   1172		ts->sys_realtime = ktime_add_ns(history->real, corr_real);
   1173	} else {
   1174		ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
   1175		ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
   1176	}
   1177
   1178	return 0;
   1179}
   1180
   1181/*
   1182 * cycle_between - true if test occurs chronologically between before and after
   1183 */
   1184static bool cycle_between(u64 before, u64 test, u64 after)
   1185{
   1186	if (test > before && test < after)
   1187		return true;
   1188	if (test < before && before > after)
   1189		return true;
   1190	return false;
   1191}
   1192
   1193/**
   1194 * get_device_system_crosststamp - Synchronously capture system/device timestamp
   1195 * @get_time_fn:	Callback to get simultaneous device time and
   1196 *	system counter from the device driver
   1197 * @ctx:		Context passed to get_time_fn()
   1198 * @history_begin:	Historical reference point used to interpolate system
   1199 *	time when counter provided by the driver is before the current interval
   1200 * @xtstamp:		Receives simultaneously captured system and device time
   1201 *
   1202 * Reads a timestamp from a device and correlates it to system time
   1203 */
   1204int get_device_system_crosststamp(int (*get_time_fn)
   1205				  (ktime_t *device_time,
   1206				   struct system_counterval_t *sys_counterval,
   1207				   void *ctx),
   1208				  void *ctx,
   1209				  struct system_time_snapshot *history_begin,
   1210				  struct system_device_crosststamp *xtstamp)
   1211{
   1212	struct system_counterval_t system_counterval;
   1213	struct timekeeper *tk = &tk_core.timekeeper;
   1214	u64 cycles, now, interval_start;
   1215	unsigned int clock_was_set_seq = 0;
   1216	ktime_t base_real, base_raw;
   1217	u64 nsec_real, nsec_raw;
   1218	u8 cs_was_changed_seq;
   1219	unsigned int seq;
   1220	bool do_interp;
   1221	int ret;
   1222
   1223	do {
   1224		seq = read_seqcount_begin(&tk_core.seq);
   1225		/*
   1226		 * Try to synchronously capture device time and a system
   1227		 * counter value calling back into the device driver
   1228		 */
   1229		ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
   1230		if (ret)
   1231			return ret;
   1232
   1233		/*
   1234		 * Verify that the clocksource associated with the captured
   1235		 * system counter value is the same as the currently installed
   1236		 * timekeeper clocksource
   1237		 */
   1238		if (tk->tkr_mono.clock != system_counterval.cs)
   1239			return -ENODEV;
   1240		cycles = system_counterval.cycles;
   1241
   1242		/*
   1243		 * Check whether the system counter value provided by the
   1244		 * device driver is on the current timekeeping interval.
   1245		 */
   1246		now = tk_clock_read(&tk->tkr_mono);
   1247		interval_start = tk->tkr_mono.cycle_last;
   1248		if (!cycle_between(interval_start, cycles, now)) {
   1249			clock_was_set_seq = tk->clock_was_set_seq;
   1250			cs_was_changed_seq = tk->cs_was_changed_seq;
   1251			cycles = interval_start;
   1252			do_interp = true;
   1253		} else {
   1254			do_interp = false;
   1255		}
   1256
   1257		base_real = ktime_add(tk->tkr_mono.base,
   1258				      tk_core.timekeeper.offs_real);
   1259		base_raw = tk->tkr_raw.base;
   1260
   1261		nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
   1262						     system_counterval.cycles);
   1263		nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
   1264						    system_counterval.cycles);
   1265	} while (read_seqcount_retry(&tk_core.seq, seq));
   1266
   1267	xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
   1268	xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
   1269
   1270	/*
   1271	 * Interpolate if necessary, adjusting back from the start of the
   1272	 * current interval
   1273	 */
   1274	if (do_interp) {
   1275		u64 partial_history_cycles, total_history_cycles;
   1276		bool discontinuity;
   1277
   1278		/*
   1279		 * Check that the counter value occurs after the provided
   1280		 * history reference and that the history doesn't cross a
   1281		 * clocksource change
   1282		 */
   1283		if (!history_begin ||
   1284		    !cycle_between(history_begin->cycles,
   1285				   system_counterval.cycles, cycles) ||
   1286		    history_begin->cs_was_changed_seq != cs_was_changed_seq)
   1287			return -EINVAL;
   1288		partial_history_cycles = cycles - system_counterval.cycles;
   1289		total_history_cycles = cycles - history_begin->cycles;
   1290		discontinuity =
   1291			history_begin->clock_was_set_seq != clock_was_set_seq;
   1292
   1293		ret = adjust_historical_crosststamp(history_begin,
   1294						    partial_history_cycles,
   1295						    total_history_cycles,
   1296						    discontinuity, xtstamp);
   1297		if (ret)
   1298			return ret;
   1299	}
   1300
   1301	return 0;
   1302}
   1303EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
   1304
   1305/**
   1306 * do_settimeofday64 - Sets the time of day.
   1307 * @ts:     pointer to the timespec64 variable containing the new time
   1308 *
   1309 * Sets the time of day to the new time and update NTP and notify hrtimers
   1310 */
   1311int do_settimeofday64(const struct timespec64 *ts)
   1312{
   1313	struct timekeeper *tk = &tk_core.timekeeper;
   1314	struct timespec64 ts_delta, xt;
   1315	unsigned long flags;
   1316	int ret = 0;
   1317
   1318	if (!timespec64_valid_settod(ts))
   1319		return -EINVAL;
   1320
   1321	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1322	write_seqcount_begin(&tk_core.seq);
   1323
   1324	timekeeping_forward_now(tk);
   1325
   1326	xt = tk_xtime(tk);
   1327	ts_delta = timespec64_sub(*ts, xt);
   1328
   1329	if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
   1330		ret = -EINVAL;
   1331		goto out;
   1332	}
   1333
   1334	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
   1335
   1336	tk_set_xtime(tk, ts);
   1337out:
   1338	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   1339
   1340	write_seqcount_end(&tk_core.seq);
   1341	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1342
   1343	/* Signal hrtimers about time change */
   1344	clock_was_set(CLOCK_SET_WALL);
   1345
   1346	if (!ret)
   1347		audit_tk_injoffset(ts_delta);
   1348
   1349	return ret;
   1350}
   1351EXPORT_SYMBOL(do_settimeofday64);
   1352
   1353/**
   1354 * timekeeping_inject_offset - Adds or subtracts from the current time.
   1355 * @ts:		Pointer to the timespec variable containing the offset
   1356 *
   1357 * Adds or subtracts an offset value from the current time.
   1358 */
   1359static int timekeeping_inject_offset(const struct timespec64 *ts)
   1360{
   1361	struct timekeeper *tk = &tk_core.timekeeper;
   1362	unsigned long flags;
   1363	struct timespec64 tmp;
   1364	int ret = 0;
   1365
   1366	if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
   1367		return -EINVAL;
   1368
   1369	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1370	write_seqcount_begin(&tk_core.seq);
   1371
   1372	timekeeping_forward_now(tk);
   1373
   1374	/* Make sure the proposed value is valid */
   1375	tmp = timespec64_add(tk_xtime(tk), *ts);
   1376	if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
   1377	    !timespec64_valid_settod(&tmp)) {
   1378		ret = -EINVAL;
   1379		goto error;
   1380	}
   1381
   1382	tk_xtime_add(tk, ts);
   1383	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
   1384
   1385error: /* even if we error out, we forwarded the time, so call update */
   1386	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   1387
   1388	write_seqcount_end(&tk_core.seq);
   1389	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1390
   1391	/* Signal hrtimers about time change */
   1392	clock_was_set(CLOCK_SET_WALL);
   1393
   1394	return ret;
   1395}
   1396
   1397/*
   1398 * Indicates if there is an offset between the system clock and the hardware
   1399 * clock/persistent clock/rtc.
   1400 */
   1401int persistent_clock_is_local;
   1402
   1403/*
   1404 * Adjust the time obtained from the CMOS to be UTC time instead of
   1405 * local time.
   1406 *
   1407 * This is ugly, but preferable to the alternatives.  Otherwise we
   1408 * would either need to write a program to do it in /etc/rc (and risk
   1409 * confusion if the program gets run more than once; it would also be
   1410 * hard to make the program warp the clock precisely n hours)  or
   1411 * compile in the timezone information into the kernel.  Bad, bad....
   1412 *
   1413 *						- TYT, 1992-01-01
   1414 *
   1415 * The best thing to do is to keep the CMOS clock in universal time (UTC)
   1416 * as real UNIX machines always do it. This avoids all headaches about
   1417 * daylight saving times and warping kernel clocks.
   1418 */
   1419void timekeeping_warp_clock(void)
   1420{
   1421	if (sys_tz.tz_minuteswest != 0) {
   1422		struct timespec64 adjust;
   1423
   1424		persistent_clock_is_local = 1;
   1425		adjust.tv_sec = sys_tz.tz_minuteswest * 60;
   1426		adjust.tv_nsec = 0;
   1427		timekeeping_inject_offset(&adjust);
   1428	}
   1429}
   1430
   1431/*
   1432 * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
   1433 */
   1434static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
   1435{
   1436	tk->tai_offset = tai_offset;
   1437	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
   1438}
   1439
   1440/*
   1441 * change_clocksource - Swaps clocksources if a new one is available
   1442 *
   1443 * Accumulates current time interval and initializes new clocksource
   1444 */
   1445static int change_clocksource(void *data)
   1446{
   1447	struct timekeeper *tk = &tk_core.timekeeper;
   1448	struct clocksource *new, *old = NULL;
   1449	unsigned long flags;
   1450	bool change = false;
   1451
   1452	new = (struct clocksource *) data;
   1453
   1454	/*
   1455	 * If the cs is in module, get a module reference. Succeeds
   1456	 * for built-in code (owner == NULL) as well.
   1457	 */
   1458	if (try_module_get(new->owner)) {
   1459		if (!new->enable || new->enable(new) == 0)
   1460			change = true;
   1461		else
   1462			module_put(new->owner);
   1463	}
   1464
   1465	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1466	write_seqcount_begin(&tk_core.seq);
   1467
   1468	timekeeping_forward_now(tk);
   1469
   1470	if (change) {
   1471		old = tk->tkr_mono.clock;
   1472		tk_setup_internals(tk, new);
   1473	}
   1474
   1475	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   1476
   1477	write_seqcount_end(&tk_core.seq);
   1478	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1479
   1480	if (old) {
   1481		if (old->disable)
   1482			old->disable(old);
   1483
   1484		module_put(old->owner);
   1485	}
   1486
   1487	return 0;
   1488}
   1489
   1490/**
   1491 * timekeeping_notify - Install a new clock source
   1492 * @clock:		pointer to the clock source
   1493 *
   1494 * This function is called from clocksource.c after a new, better clock
   1495 * source has been registered. The caller holds the clocksource_mutex.
   1496 */
   1497int timekeeping_notify(struct clocksource *clock)
   1498{
   1499	struct timekeeper *tk = &tk_core.timekeeper;
   1500
   1501	if (tk->tkr_mono.clock == clock)
   1502		return 0;
   1503	stop_machine(change_clocksource, clock, NULL);
   1504	tick_clock_notify();
   1505	return tk->tkr_mono.clock == clock ? 0 : -1;
   1506}
   1507
   1508/**
   1509 * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
   1510 * @ts:		pointer to the timespec64 to be set
   1511 *
   1512 * Returns the raw monotonic time (completely un-modified by ntp)
   1513 */
   1514void ktime_get_raw_ts64(struct timespec64 *ts)
   1515{
   1516	struct timekeeper *tk = &tk_core.timekeeper;
   1517	unsigned int seq;
   1518	u64 nsecs;
   1519
   1520	do {
   1521		seq = read_seqcount_begin(&tk_core.seq);
   1522		ts->tv_sec = tk->raw_sec;
   1523		nsecs = timekeeping_get_ns(&tk->tkr_raw);
   1524
   1525	} while (read_seqcount_retry(&tk_core.seq, seq));
   1526
   1527	ts->tv_nsec = 0;
   1528	timespec64_add_ns(ts, nsecs);
   1529}
   1530EXPORT_SYMBOL(ktime_get_raw_ts64);
   1531
   1532
   1533/**
   1534 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
   1535 */
   1536int timekeeping_valid_for_hres(void)
   1537{
   1538	struct timekeeper *tk = &tk_core.timekeeper;
   1539	unsigned int seq;
   1540	int ret;
   1541
   1542	do {
   1543		seq = read_seqcount_begin(&tk_core.seq);
   1544
   1545		ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
   1546
   1547	} while (read_seqcount_retry(&tk_core.seq, seq));
   1548
   1549	return ret;
   1550}
   1551
   1552/**
   1553 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
   1554 */
   1555u64 timekeeping_max_deferment(void)
   1556{
   1557	struct timekeeper *tk = &tk_core.timekeeper;
   1558	unsigned int seq;
   1559	u64 ret;
   1560
   1561	do {
   1562		seq = read_seqcount_begin(&tk_core.seq);
   1563
   1564		ret = tk->tkr_mono.clock->max_idle_ns;
   1565
   1566	} while (read_seqcount_retry(&tk_core.seq, seq));
   1567
   1568	return ret;
   1569}
   1570
   1571/**
   1572 * read_persistent_clock64 -  Return time from the persistent clock.
   1573 * @ts: Pointer to the storage for the readout value
   1574 *
   1575 * Weak dummy function for arches that do not yet support it.
   1576 * Reads the time from the battery backed persistent clock.
   1577 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
   1578 *
   1579 *  XXX - Do be sure to remove it once all arches implement it.
   1580 */
   1581void __weak read_persistent_clock64(struct timespec64 *ts)
   1582{
   1583	ts->tv_sec = 0;
   1584	ts->tv_nsec = 0;
   1585}
   1586
   1587/**
   1588 * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
   1589 *                                        from the boot.
   1590 *
   1591 * Weak dummy function for arches that do not yet support it.
   1592 * @wall_time:	- current time as returned by persistent clock
   1593 * @boot_offset: - offset that is defined as wall_time - boot_time
   1594 *
   1595 * The default function calculates offset based on the current value of
   1596 * local_clock(). This way architectures that support sched_clock() but don't
   1597 * support dedicated boot time clock will provide the best estimate of the
   1598 * boot time.
   1599 */
   1600void __weak __init
   1601read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
   1602				     struct timespec64 *boot_offset)
   1603{
   1604	read_persistent_clock64(wall_time);
   1605	*boot_offset = ns_to_timespec64(local_clock());
   1606}
   1607
   1608/*
   1609 * Flag reflecting whether timekeeping_resume() has injected sleeptime.
   1610 *
   1611 * The flag starts of false and is only set when a suspend reaches
   1612 * timekeeping_suspend(), timekeeping_resume() sets it to false when the
   1613 * timekeeper clocksource is not stopping across suspend and has been
   1614 * used to update sleep time. If the timekeeper clocksource has stopped
   1615 * then the flag stays true and is used by the RTC resume code to decide
   1616 * whether sleeptime must be injected and if so the flag gets false then.
   1617 *
   1618 * If a suspend fails before reaching timekeeping_resume() then the flag
   1619 * stays false and prevents erroneous sleeptime injection.
   1620 */
   1621static bool suspend_timing_needed;
   1622
   1623/* Flag for if there is a persistent clock on this platform */
   1624static bool persistent_clock_exists;
   1625
   1626/*
   1627 * timekeeping_init - Initializes the clocksource and common timekeeping values
   1628 */
   1629void __init timekeeping_init(void)
   1630{
   1631	struct timespec64 wall_time, boot_offset, wall_to_mono;
   1632	struct timekeeper *tk = &tk_core.timekeeper;
   1633	struct clocksource *clock;
   1634	unsigned long flags;
   1635
   1636	read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
   1637	if (timespec64_valid_settod(&wall_time) &&
   1638	    timespec64_to_ns(&wall_time) > 0) {
   1639		persistent_clock_exists = true;
   1640	} else if (timespec64_to_ns(&wall_time) != 0) {
   1641		pr_warn("Persistent clock returned invalid value");
   1642		wall_time = (struct timespec64){0};
   1643	}
   1644
   1645	if (timespec64_compare(&wall_time, &boot_offset) < 0)
   1646		boot_offset = (struct timespec64){0};
   1647
   1648	/*
   1649	 * We want set wall_to_mono, so the following is true:
   1650	 * wall time + wall_to_mono = boot time
   1651	 */
   1652	wall_to_mono = timespec64_sub(boot_offset, wall_time);
   1653
   1654	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1655	write_seqcount_begin(&tk_core.seq);
   1656	ntp_init();
   1657
   1658	clock = clocksource_default_clock();
   1659	if (clock->enable)
   1660		clock->enable(clock);
   1661	tk_setup_internals(tk, clock);
   1662
   1663	tk_set_xtime(tk, &wall_time);
   1664	tk->raw_sec = 0;
   1665
   1666	tk_set_wall_to_mono(tk, wall_to_mono);
   1667
   1668	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
   1669
   1670	write_seqcount_end(&tk_core.seq);
   1671	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1672}
   1673
   1674/* time in seconds when suspend began for persistent clock */
   1675static struct timespec64 timekeeping_suspend_time;
   1676
   1677/**
   1678 * __timekeeping_inject_sleeptime - Internal function to add sleep interval
   1679 * @tk:		Pointer to the timekeeper to be updated
   1680 * @delta:	Pointer to the delta value in timespec64 format
   1681 *
   1682 * Takes a timespec offset measuring a suspend interval and properly
   1683 * adds the sleep offset to the timekeeping variables.
   1684 */
   1685static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
   1686					   const struct timespec64 *delta)
   1687{
   1688	if (!timespec64_valid_strict(delta)) {
   1689		printk_deferred(KERN_WARNING
   1690				"__timekeeping_inject_sleeptime: Invalid "
   1691				"sleep delta value!\n");
   1692		return;
   1693	}
   1694	tk_xtime_add(tk, delta);
   1695	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
   1696	tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
   1697	tk_debug_account_sleep_time(delta);
   1698}
   1699
   1700#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
   1701/**
   1702 * We have three kinds of time sources to use for sleep time
   1703 * injection, the preference order is:
   1704 * 1) non-stop clocksource
   1705 * 2) persistent clock (ie: RTC accessible when irqs are off)
   1706 * 3) RTC
   1707 *
   1708 * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
   1709 * If system has neither 1) nor 2), 3) will be used finally.
   1710 *
   1711 *
   1712 * If timekeeping has injected sleeptime via either 1) or 2),
   1713 * 3) becomes needless, so in this case we don't need to call
   1714 * rtc_resume(), and this is what timekeeping_rtc_skipresume()
   1715 * means.
   1716 */
   1717bool timekeeping_rtc_skipresume(void)
   1718{
   1719	return !suspend_timing_needed;
   1720}
   1721
   1722/**
   1723 * 1) can be determined whether to use or not only when doing
   1724 * timekeeping_resume() which is invoked after rtc_suspend(),
   1725 * so we can't skip rtc_suspend() surely if system has 1).
   1726 *
   1727 * But if system has 2), 2) will definitely be used, so in this
   1728 * case we don't need to call rtc_suspend(), and this is what
   1729 * timekeeping_rtc_skipsuspend() means.
   1730 */
   1731bool timekeeping_rtc_skipsuspend(void)
   1732{
   1733	return persistent_clock_exists;
   1734}
   1735
   1736/**
   1737 * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
   1738 * @delta: pointer to a timespec64 delta value
   1739 *
   1740 * This hook is for architectures that cannot support read_persistent_clock64
   1741 * because their RTC/persistent clock is only accessible when irqs are enabled.
   1742 * and also don't have an effective nonstop clocksource.
   1743 *
   1744 * This function should only be called by rtc_resume(), and allows
   1745 * a suspend offset to be injected into the timekeeping values.
   1746 */
   1747void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
   1748{
   1749	struct timekeeper *tk = &tk_core.timekeeper;
   1750	unsigned long flags;
   1751
   1752	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1753	write_seqcount_begin(&tk_core.seq);
   1754
   1755	suspend_timing_needed = false;
   1756
   1757	timekeeping_forward_now(tk);
   1758
   1759	__timekeeping_inject_sleeptime(tk, delta);
   1760
   1761	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
   1762
   1763	write_seqcount_end(&tk_core.seq);
   1764	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1765
   1766	/* Signal hrtimers about time change */
   1767	clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
   1768}
   1769#endif
   1770
   1771/**
   1772 * timekeeping_resume - Resumes the generic timekeeping subsystem.
   1773 */
   1774void timekeeping_resume(void)
   1775{
   1776	struct timekeeper *tk = &tk_core.timekeeper;
   1777	struct clocksource *clock = tk->tkr_mono.clock;
   1778	unsigned long flags;
   1779	struct timespec64 ts_new, ts_delta;
   1780	u64 cycle_now, nsec;
   1781	bool inject_sleeptime = false;
   1782
   1783	read_persistent_clock64(&ts_new);
   1784
   1785	clockevents_resume();
   1786	clocksource_resume();
   1787
   1788	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1789	write_seqcount_begin(&tk_core.seq);
   1790
   1791	/*
   1792	 * After system resumes, we need to calculate the suspended time and
   1793	 * compensate it for the OS time. There are 3 sources that could be
   1794	 * used: Nonstop clocksource during suspend, persistent clock and rtc
   1795	 * device.
   1796	 *
   1797	 * One specific platform may have 1 or 2 or all of them, and the
   1798	 * preference will be:
   1799	 *	suspend-nonstop clocksource -> persistent clock -> rtc
   1800	 * The less preferred source will only be tried if there is no better
   1801	 * usable source. The rtc part is handled separately in rtc core code.
   1802	 */
   1803	cycle_now = tk_clock_read(&tk->tkr_mono);
   1804	nsec = clocksource_stop_suspend_timing(clock, cycle_now);
   1805	if (nsec > 0) {
   1806		ts_delta = ns_to_timespec64(nsec);
   1807		inject_sleeptime = true;
   1808	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
   1809		ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
   1810		inject_sleeptime = true;
   1811	}
   1812
   1813	if (inject_sleeptime) {
   1814		suspend_timing_needed = false;
   1815		__timekeeping_inject_sleeptime(tk, &ts_delta);
   1816	}
   1817
   1818	/* Re-base the last cycle value */
   1819	tk->tkr_mono.cycle_last = cycle_now;
   1820	tk->tkr_raw.cycle_last  = cycle_now;
   1821
   1822	tk->ntp_error = 0;
   1823	timekeeping_suspended = 0;
   1824	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
   1825	write_seqcount_end(&tk_core.seq);
   1826	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1827
   1828	touch_softlockup_watchdog();
   1829
   1830	/* Resume the clockevent device(s) and hrtimers */
   1831	tick_resume();
   1832	/* Notify timerfd as resume is equivalent to clock_was_set() */
   1833	timerfd_resume();
   1834}
   1835
   1836int timekeeping_suspend(void)
   1837{
   1838	struct timekeeper *tk = &tk_core.timekeeper;
   1839	unsigned long flags;
   1840	struct timespec64		delta, delta_delta;
   1841	static struct timespec64	old_delta;
   1842	struct clocksource *curr_clock;
   1843	u64 cycle_now;
   1844
   1845	read_persistent_clock64(&timekeeping_suspend_time);
   1846
   1847	/*
   1848	 * On some systems the persistent_clock can not be detected at
   1849	 * timekeeping_init by its return value, so if we see a valid
   1850	 * value returned, update the persistent_clock_exists flag.
   1851	 */
   1852	if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
   1853		persistent_clock_exists = true;
   1854
   1855	suspend_timing_needed = true;
   1856
   1857	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   1858	write_seqcount_begin(&tk_core.seq);
   1859	timekeeping_forward_now(tk);
   1860	timekeeping_suspended = 1;
   1861
   1862	/*
   1863	 * Since we've called forward_now, cycle_last stores the value
   1864	 * just read from the current clocksource. Save this to potentially
   1865	 * use in suspend timing.
   1866	 */
   1867	curr_clock = tk->tkr_mono.clock;
   1868	cycle_now = tk->tkr_mono.cycle_last;
   1869	clocksource_start_suspend_timing(curr_clock, cycle_now);
   1870
   1871	if (persistent_clock_exists) {
   1872		/*
   1873		 * To avoid drift caused by repeated suspend/resumes,
   1874		 * which each can add ~1 second drift error,
   1875		 * try to compensate so the difference in system time
   1876		 * and persistent_clock time stays close to constant.
   1877		 */
   1878		delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
   1879		delta_delta = timespec64_sub(delta, old_delta);
   1880		if (abs(delta_delta.tv_sec) >= 2) {
   1881			/*
   1882			 * if delta_delta is too large, assume time correction
   1883			 * has occurred and set old_delta to the current delta.
   1884			 */
   1885			old_delta = delta;
   1886		} else {
   1887			/* Otherwise try to adjust old_system to compensate */
   1888			timekeeping_suspend_time =
   1889				timespec64_add(timekeeping_suspend_time, delta_delta);
   1890		}
   1891	}
   1892
   1893	timekeeping_update(tk, TK_MIRROR);
   1894	halt_fast_timekeeper(tk);
   1895	write_seqcount_end(&tk_core.seq);
   1896	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   1897
   1898	tick_suspend();
   1899	clocksource_suspend();
   1900	clockevents_suspend();
   1901
   1902	return 0;
   1903}
   1904
   1905/* sysfs resume/suspend bits for timekeeping */
   1906static struct syscore_ops timekeeping_syscore_ops = {
   1907	.resume		= timekeeping_resume,
   1908	.suspend	= timekeeping_suspend,
   1909};
   1910
   1911static int __init timekeeping_init_ops(void)
   1912{
   1913	register_syscore_ops(&timekeeping_syscore_ops);
   1914	return 0;
   1915}
   1916device_initcall(timekeeping_init_ops);
   1917
   1918/*
   1919 * Apply a multiplier adjustment to the timekeeper
   1920 */
   1921static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
   1922							 s64 offset,
   1923							 s32 mult_adj)
   1924{
   1925	s64 interval = tk->cycle_interval;
   1926
   1927	if (mult_adj == 0) {
   1928		return;
   1929	} else if (mult_adj == -1) {
   1930		interval = -interval;
   1931		offset = -offset;
   1932	} else if (mult_adj != 1) {
   1933		interval *= mult_adj;
   1934		offset *= mult_adj;
   1935	}
   1936
   1937	/*
   1938	 * So the following can be confusing.
   1939	 *
   1940	 * To keep things simple, lets assume mult_adj == 1 for now.
   1941	 *
   1942	 * When mult_adj != 1, remember that the interval and offset values
   1943	 * have been appropriately scaled so the math is the same.
   1944	 *
   1945	 * The basic idea here is that we're increasing the multiplier
   1946	 * by one, this causes the xtime_interval to be incremented by
   1947	 * one cycle_interval. This is because:
   1948	 *	xtime_interval = cycle_interval * mult
   1949	 * So if mult is being incremented by one:
   1950	 *	xtime_interval = cycle_interval * (mult + 1)
   1951	 * Its the same as:
   1952	 *	xtime_interval = (cycle_interval * mult) + cycle_interval
   1953	 * Which can be shortened to:
   1954	 *	xtime_interval += cycle_interval
   1955	 *
   1956	 * So offset stores the non-accumulated cycles. Thus the current
   1957	 * time (in shifted nanoseconds) is:
   1958	 *	now = (offset * adj) + xtime_nsec
   1959	 * Now, even though we're adjusting the clock frequency, we have
   1960	 * to keep time consistent. In other words, we can't jump back
   1961	 * in time, and we also want to avoid jumping forward in time.
   1962	 *
   1963	 * So given the same offset value, we need the time to be the same
   1964	 * both before and after the freq adjustment.
   1965	 *	now = (offset * adj_1) + xtime_nsec_1
   1966	 *	now = (offset * adj_2) + xtime_nsec_2
   1967	 * So:
   1968	 *	(offset * adj_1) + xtime_nsec_1 =
   1969	 *		(offset * adj_2) + xtime_nsec_2
   1970	 * And we know:
   1971	 *	adj_2 = adj_1 + 1
   1972	 * So:
   1973	 *	(offset * adj_1) + xtime_nsec_1 =
   1974	 *		(offset * (adj_1+1)) + xtime_nsec_2
   1975	 *	(offset * adj_1) + xtime_nsec_1 =
   1976	 *		(offset * adj_1) + offset + xtime_nsec_2
   1977	 * Canceling the sides:
   1978	 *	xtime_nsec_1 = offset + xtime_nsec_2
   1979	 * Which gives us:
   1980	 *	xtime_nsec_2 = xtime_nsec_1 - offset
   1981	 * Which simplifies to:
   1982	 *	xtime_nsec -= offset
   1983	 */
   1984	if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
   1985		/* NTP adjustment caused clocksource mult overflow */
   1986		WARN_ON_ONCE(1);
   1987		return;
   1988	}
   1989
   1990	tk->tkr_mono.mult += mult_adj;
   1991	tk->xtime_interval += interval;
   1992	tk->tkr_mono.xtime_nsec -= offset;
   1993}
   1994
   1995/*
   1996 * Adjust the timekeeper's multiplier to the correct frequency
   1997 * and also to reduce the accumulated error value.
   1998 */
   1999static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
   2000{
   2001	u32 mult;
   2002
   2003	/*
   2004	 * Determine the multiplier from the current NTP tick length.
   2005	 * Avoid expensive division when the tick length doesn't change.
   2006	 */
   2007	if (likely(tk->ntp_tick == ntp_tick_length())) {
   2008		mult = tk->tkr_mono.mult - tk->ntp_err_mult;
   2009	} else {
   2010		tk->ntp_tick = ntp_tick_length();
   2011		mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
   2012				 tk->xtime_remainder, tk->cycle_interval);
   2013	}
   2014
   2015	/*
   2016	 * If the clock is behind the NTP time, increase the multiplier by 1
   2017	 * to catch up with it. If it's ahead and there was a remainder in the
   2018	 * tick division, the clock will slow down. Otherwise it will stay
   2019	 * ahead until the tick length changes to a non-divisible value.
   2020	 */
   2021	tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
   2022	mult += tk->ntp_err_mult;
   2023
   2024	timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
   2025
   2026	if (unlikely(tk->tkr_mono.clock->maxadj &&
   2027		(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
   2028			> tk->tkr_mono.clock->maxadj))) {
   2029		printk_once(KERN_WARNING
   2030			"Adjusting %s more than 11%% (%ld vs %ld)\n",
   2031			tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
   2032			(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
   2033	}
   2034
   2035	/*
   2036	 * It may be possible that when we entered this function, xtime_nsec
   2037	 * was very small.  Further, if we're slightly speeding the clocksource
   2038	 * in the code above, its possible the required corrective factor to
   2039	 * xtime_nsec could cause it to underflow.
   2040	 *
   2041	 * Now, since we have already accumulated the second and the NTP
   2042	 * subsystem has been notified via second_overflow(), we need to skip
   2043	 * the next update.
   2044	 */
   2045	if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
   2046		tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
   2047							tk->tkr_mono.shift;
   2048		tk->xtime_sec--;
   2049		tk->skip_second_overflow = 1;
   2050	}
   2051}
   2052
   2053/*
   2054 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
   2055 *
   2056 * Helper function that accumulates the nsecs greater than a second
   2057 * from the xtime_nsec field to the xtime_secs field.
   2058 * It also calls into the NTP code to handle leapsecond processing.
   2059 */
   2060static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
   2061{
   2062	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
   2063	unsigned int clock_set = 0;
   2064
   2065	while (tk->tkr_mono.xtime_nsec >= nsecps) {
   2066		int leap;
   2067
   2068		tk->tkr_mono.xtime_nsec -= nsecps;
   2069		tk->xtime_sec++;
   2070
   2071		/*
   2072		 * Skip NTP update if this second was accumulated before,
   2073		 * i.e. xtime_nsec underflowed in timekeeping_adjust()
   2074		 */
   2075		if (unlikely(tk->skip_second_overflow)) {
   2076			tk->skip_second_overflow = 0;
   2077			continue;
   2078		}
   2079
   2080		/* Figure out if its a leap sec and apply if needed */
   2081		leap = second_overflow(tk->xtime_sec);
   2082		if (unlikely(leap)) {
   2083			struct timespec64 ts;
   2084
   2085			tk->xtime_sec += leap;
   2086
   2087			ts.tv_sec = leap;
   2088			ts.tv_nsec = 0;
   2089			tk_set_wall_to_mono(tk,
   2090				timespec64_sub(tk->wall_to_monotonic, ts));
   2091
   2092			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
   2093
   2094			clock_set = TK_CLOCK_WAS_SET;
   2095		}
   2096	}
   2097	return clock_set;
   2098}
   2099
   2100/*
   2101 * logarithmic_accumulation - shifted accumulation of cycles
   2102 *
   2103 * This functions accumulates a shifted interval of cycles into
   2104 * a shifted interval nanoseconds. Allows for O(log) accumulation
   2105 * loop.
   2106 *
   2107 * Returns the unconsumed cycles.
   2108 */
   2109static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
   2110				    u32 shift, unsigned int *clock_set)
   2111{
   2112	u64 interval = tk->cycle_interval << shift;
   2113	u64 snsec_per_sec;
   2114
   2115	/* If the offset is smaller than a shifted interval, do nothing */
   2116	if (offset < interval)
   2117		return offset;
   2118
   2119	/* Accumulate one shifted interval */
   2120	offset -= interval;
   2121	tk->tkr_mono.cycle_last += interval;
   2122	tk->tkr_raw.cycle_last  += interval;
   2123
   2124	tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
   2125	*clock_set |= accumulate_nsecs_to_secs(tk);
   2126
   2127	/* Accumulate raw time */
   2128	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
   2129	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
   2130	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
   2131		tk->tkr_raw.xtime_nsec -= snsec_per_sec;
   2132		tk->raw_sec++;
   2133	}
   2134
   2135	/* Accumulate error between NTP and clock interval */
   2136	tk->ntp_error += tk->ntp_tick << shift;
   2137	tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
   2138						(tk->ntp_error_shift + shift);
   2139
   2140	return offset;
   2141}
   2142
   2143/*
   2144 * timekeeping_advance - Updates the timekeeper to the current time and
   2145 * current NTP tick length
   2146 */
   2147static bool timekeeping_advance(enum timekeeping_adv_mode mode)
   2148{
   2149	struct timekeeper *real_tk = &tk_core.timekeeper;
   2150	struct timekeeper *tk = &shadow_timekeeper;
   2151	u64 offset;
   2152	int shift = 0, maxshift;
   2153	unsigned int clock_set = 0;
   2154	unsigned long flags;
   2155
   2156	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   2157
   2158	/* Make sure we're fully resumed: */
   2159	if (unlikely(timekeeping_suspended))
   2160		goto out;
   2161
   2162	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
   2163				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
   2164
   2165	/* Check if there's really nothing to do */
   2166	if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
   2167		goto out;
   2168
   2169	/* Do some additional sanity checking */
   2170	timekeeping_check_update(tk, offset);
   2171
   2172	/*
   2173	 * With NO_HZ we may have to accumulate many cycle_intervals
   2174	 * (think "ticks") worth of time at once. To do this efficiently,
   2175	 * we calculate the largest doubling multiple of cycle_intervals
   2176	 * that is smaller than the offset.  We then accumulate that
   2177	 * chunk in one go, and then try to consume the next smaller
   2178	 * doubled multiple.
   2179	 */
   2180	shift = ilog2(offset) - ilog2(tk->cycle_interval);
   2181	shift = max(0, shift);
   2182	/* Bound shift to one less than what overflows tick_length */
   2183	maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
   2184	shift = min(shift, maxshift);
   2185	while (offset >= tk->cycle_interval) {
   2186		offset = logarithmic_accumulation(tk, offset, shift,
   2187							&clock_set);
   2188		if (offset < tk->cycle_interval<<shift)
   2189			shift--;
   2190	}
   2191
   2192	/* Adjust the multiplier to correct NTP error */
   2193	timekeeping_adjust(tk, offset);
   2194
   2195	/*
   2196	 * Finally, make sure that after the rounding
   2197	 * xtime_nsec isn't larger than NSEC_PER_SEC
   2198	 */
   2199	clock_set |= accumulate_nsecs_to_secs(tk);
   2200
   2201	write_seqcount_begin(&tk_core.seq);
   2202	/*
   2203	 * Update the real timekeeper.
   2204	 *
   2205	 * We could avoid this memcpy by switching pointers, but that
   2206	 * requires changes to all other timekeeper usage sites as
   2207	 * well, i.e. move the timekeeper pointer getter into the
   2208	 * spinlocked/seqcount protected sections. And we trade this
   2209	 * memcpy under the tk_core.seq against one before we start
   2210	 * updating.
   2211	 */
   2212	timekeeping_update(tk, clock_set);
   2213	memcpy(real_tk, tk, sizeof(*tk));
   2214	/* The memcpy must come last. Do not put anything here! */
   2215	write_seqcount_end(&tk_core.seq);
   2216out:
   2217	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   2218
   2219	return !!clock_set;
   2220}
   2221
   2222/**
   2223 * update_wall_time - Uses the current clocksource to increment the wall time
   2224 *
   2225 */
   2226void update_wall_time(void)
   2227{
   2228	if (timekeeping_advance(TK_ADV_TICK))
   2229		clock_was_set_delayed();
   2230}
   2231
   2232/**
   2233 * getboottime64 - Return the real time of system boot.
   2234 * @ts:		pointer to the timespec64 to be set
   2235 *
   2236 * Returns the wall-time of boot in a timespec64.
   2237 *
   2238 * This is based on the wall_to_monotonic offset and the total suspend
   2239 * time. Calls to settimeofday will affect the value returned (which
   2240 * basically means that however wrong your real time clock is at boot time,
   2241 * you get the right time here).
   2242 */
   2243void getboottime64(struct timespec64 *ts)
   2244{
   2245	struct timekeeper *tk = &tk_core.timekeeper;
   2246	ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
   2247
   2248	*ts = ktime_to_timespec64(t);
   2249}
   2250EXPORT_SYMBOL_GPL(getboottime64);
   2251
   2252void ktime_get_coarse_real_ts64(struct timespec64 *ts)
   2253{
   2254	struct timekeeper *tk = &tk_core.timekeeper;
   2255	unsigned int seq;
   2256
   2257	do {
   2258		seq = read_seqcount_begin(&tk_core.seq);
   2259
   2260		*ts = tk_xtime(tk);
   2261	} while (read_seqcount_retry(&tk_core.seq, seq));
   2262}
   2263EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
   2264
   2265void ktime_get_coarse_ts64(struct timespec64 *ts)
   2266{
   2267	struct timekeeper *tk = &tk_core.timekeeper;
   2268	struct timespec64 now, mono;
   2269	unsigned int seq;
   2270
   2271	do {
   2272		seq = read_seqcount_begin(&tk_core.seq);
   2273
   2274		now = tk_xtime(tk);
   2275		mono = tk->wall_to_monotonic;
   2276	} while (read_seqcount_retry(&tk_core.seq, seq));
   2277
   2278	set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
   2279				now.tv_nsec + mono.tv_nsec);
   2280}
   2281EXPORT_SYMBOL(ktime_get_coarse_ts64);
   2282
   2283/*
   2284 * Must hold jiffies_lock
   2285 */
   2286void do_timer(unsigned long ticks)
   2287{
   2288	jiffies_64 += ticks;
   2289	calc_global_load();
   2290}
   2291
   2292/**
   2293 * ktime_get_update_offsets_now - hrtimer helper
   2294 * @cwsseq:	pointer to check and store the clock was set sequence number
   2295 * @offs_real:	pointer to storage for monotonic -> realtime offset
   2296 * @offs_boot:	pointer to storage for monotonic -> boottime offset
   2297 * @offs_tai:	pointer to storage for monotonic -> clock tai offset
   2298 *
   2299 * Returns current monotonic time and updates the offsets if the
   2300 * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
   2301 * different.
   2302 *
   2303 * Called from hrtimer_interrupt() or retrigger_next_event()
   2304 */
   2305ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
   2306				     ktime_t *offs_boot, ktime_t *offs_tai)
   2307{
   2308	struct timekeeper *tk = &tk_core.timekeeper;
   2309	unsigned int seq;
   2310	ktime_t base;
   2311	u64 nsecs;
   2312
   2313	do {
   2314		seq = read_seqcount_begin(&tk_core.seq);
   2315
   2316		base = tk->tkr_mono.base;
   2317		nsecs = timekeeping_get_ns(&tk->tkr_mono);
   2318		base = ktime_add_ns(base, nsecs);
   2319
   2320		if (*cwsseq != tk->clock_was_set_seq) {
   2321			*cwsseq = tk->clock_was_set_seq;
   2322			*offs_real = tk->offs_real;
   2323			*offs_boot = tk->offs_boot;
   2324			*offs_tai = tk->offs_tai;
   2325		}
   2326
   2327		/* Handle leapsecond insertion adjustments */
   2328		if (unlikely(base >= tk->next_leap_ktime))
   2329			*offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
   2330
   2331	} while (read_seqcount_retry(&tk_core.seq, seq));
   2332
   2333	return base;
   2334}
   2335
   2336/*
   2337 * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
   2338 */
   2339static int timekeeping_validate_timex(const struct __kernel_timex *txc)
   2340{
   2341	if (txc->modes & ADJ_ADJTIME) {
   2342		/* singleshot must not be used with any other mode bits */
   2343		if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
   2344			return -EINVAL;
   2345		if (!(txc->modes & ADJ_OFFSET_READONLY) &&
   2346		    !capable(CAP_SYS_TIME))
   2347			return -EPERM;
   2348	} else {
   2349		/* In order to modify anything, you gotta be super-user! */
   2350		if (txc->modes && !capable(CAP_SYS_TIME))
   2351			return -EPERM;
   2352		/*
   2353		 * if the quartz is off by more than 10% then
   2354		 * something is VERY wrong!
   2355		 */
   2356		if (txc->modes & ADJ_TICK &&
   2357		    (txc->tick <  900000/USER_HZ ||
   2358		     txc->tick > 1100000/USER_HZ))
   2359			return -EINVAL;
   2360	}
   2361
   2362	if (txc->modes & ADJ_SETOFFSET) {
   2363		/* In order to inject time, you gotta be super-user! */
   2364		if (!capable(CAP_SYS_TIME))
   2365			return -EPERM;
   2366
   2367		/*
   2368		 * Validate if a timespec/timeval used to inject a time
   2369		 * offset is valid.  Offsets can be positive or negative, so
   2370		 * we don't check tv_sec. The value of the timeval/timespec
   2371		 * is the sum of its fields,but *NOTE*:
   2372		 * The field tv_usec/tv_nsec must always be non-negative and
   2373		 * we can't have more nanoseconds/microseconds than a second.
   2374		 */
   2375		if (txc->time.tv_usec < 0)
   2376			return -EINVAL;
   2377
   2378		if (txc->modes & ADJ_NANO) {
   2379			if (txc->time.tv_usec >= NSEC_PER_SEC)
   2380				return -EINVAL;
   2381		} else {
   2382			if (txc->time.tv_usec >= USEC_PER_SEC)
   2383				return -EINVAL;
   2384		}
   2385	}
   2386
   2387	/*
   2388	 * Check for potential multiplication overflows that can
   2389	 * only happen on 64-bit systems:
   2390	 */
   2391	if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
   2392		if (LLONG_MIN / PPM_SCALE > txc->freq)
   2393			return -EINVAL;
   2394		if (LLONG_MAX / PPM_SCALE < txc->freq)
   2395			return -EINVAL;
   2396	}
   2397
   2398	return 0;
   2399}
   2400
   2401/**
   2402 * random_get_entropy_fallback - Returns the raw clock source value,
   2403 * used by random.c for platforms with no valid random_get_entropy().
   2404 */
   2405unsigned long random_get_entropy_fallback(void)
   2406{
   2407	struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
   2408	struct clocksource *clock = READ_ONCE(tkr->clock);
   2409
   2410	if (unlikely(timekeeping_suspended || !clock))
   2411		return 0;
   2412	return clock->read(clock);
   2413}
   2414EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
   2415
   2416/**
   2417 * do_adjtimex() - Accessor function to NTP __do_adjtimex function
   2418 */
   2419int do_adjtimex(struct __kernel_timex *txc)
   2420{
   2421	struct timekeeper *tk = &tk_core.timekeeper;
   2422	struct audit_ntp_data ad;
   2423	bool clock_set = false;
   2424	struct timespec64 ts;
   2425	unsigned long flags;
   2426	s32 orig_tai, tai;
   2427	int ret;
   2428
   2429	/* Validate the data before disabling interrupts */
   2430	ret = timekeeping_validate_timex(txc);
   2431	if (ret)
   2432		return ret;
   2433
   2434	if (txc->modes & ADJ_SETOFFSET) {
   2435		struct timespec64 delta;
   2436		delta.tv_sec  = txc->time.tv_sec;
   2437		delta.tv_nsec = txc->time.tv_usec;
   2438		if (!(txc->modes & ADJ_NANO))
   2439			delta.tv_nsec *= 1000;
   2440		ret = timekeeping_inject_offset(&delta);
   2441		if (ret)
   2442			return ret;
   2443
   2444		audit_tk_injoffset(delta);
   2445	}
   2446
   2447	audit_ntp_init(&ad);
   2448
   2449	ktime_get_real_ts64(&ts);
   2450
   2451	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   2452	write_seqcount_begin(&tk_core.seq);
   2453
   2454	orig_tai = tai = tk->tai_offset;
   2455	ret = __do_adjtimex(txc, &ts, &tai, &ad);
   2456
   2457	if (tai != orig_tai) {
   2458		__timekeeping_set_tai_offset(tk, tai);
   2459		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
   2460		clock_set = true;
   2461	}
   2462	tk_update_leap_state(tk);
   2463
   2464	write_seqcount_end(&tk_core.seq);
   2465	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   2466
   2467	audit_ntp_log(&ad);
   2468
   2469	/* Update the multiplier immediately if frequency was set directly */
   2470	if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
   2471		clock_set |= timekeeping_advance(TK_ADV_FREQ);
   2472
   2473	if (clock_set)
   2474		clock_was_set(CLOCK_REALTIME);
   2475
   2476	ntp_notify_cmos_timer();
   2477
   2478	return ret;
   2479}
   2480
   2481#ifdef CONFIG_NTP_PPS
   2482/**
   2483 * hardpps() - Accessor function to NTP __hardpps function
   2484 */
   2485void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
   2486{
   2487	unsigned long flags;
   2488
   2489	raw_spin_lock_irqsave(&timekeeper_lock, flags);
   2490	write_seqcount_begin(&tk_core.seq);
   2491
   2492	__hardpps(phase_ts, raw_ts);
   2493
   2494	write_seqcount_end(&tk_core.seq);
   2495	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
   2496}
   2497EXPORT_SYMBOL(hardpps);
   2498#endif /* CONFIG_NTP_PPS */