clk-apple-nco.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
clk-apple-nco.c (8812B)
      1// SPDX-License-Identifier: GPL-2.0-only OR MIT
      2/*
      3 * Driver for an SoC block (Numerically Controlled Oscillator)
      4 * found on t8103 (M1) and other Apple chips
      5 *
      6 * Copyright (C) The Asahi Linux Contributors
      7 */
      8
      9#include <linux/bits.h>
     10#include <linux/bitfield.h>
     11#include <linux/clk-provider.h>
     12#include <linux/io.h>
     13#include <linux/kernel.h>
     14#include <linux/math64.h>
     15#include <linux/module.h>
     16#include <linux/of.h>
     17#include <linux/platform_device.h>
     18#include <linux/spinlock.h>
     19
     20#define NCO_CHANNEL_STRIDE	0x4000
     21#define NCO_CHANNEL_REGSIZE	20
     22
     23#define REG_CTRL	0
     24#define CTRL_ENABLE	BIT(31)
     25#define REG_DIV		4
     26#define DIV_FINE	GENMASK(1, 0)
     27#define DIV_COARSE	GENMASK(12, 2)
     28#define REG_INC1	8
     29#define REG_INC2	12
     30#define REG_ACCINIT	16
     31
     32/*
     33 * Theory of operation (postulated)
     34 *
     35 * The REG_DIV register indirectly expresses a base integer divisor, roughly
     36 * corresponding to twice the desired ratio of input to output clock. This
     37 * base divisor is adjusted on a cycle-by-cycle basis based on the state of a
     38 * 32-bit phase accumulator to achieve a desired precise clock ratio over the
     39 * long term.
     40 *
     41 * Specifically an output clock cycle is produced after (REG_DIV divisor)/2
     42 * or (REG_DIV divisor + 1)/2 input cycles, the latter taking effect when top
     43 * bit of the 32-bit accumulator is set. The accumulator is incremented each
     44 * produced output cycle, by the value from either REG_INC1 or REG_INC2, which
     45 * of the two is selected depending again on the accumulator's current top bit.
     46 *
     47 * Because the NCO hardware implements counting of input clock cycles in part
     48 * in a Galois linear-feedback shift register, the higher bits of divisor
     49 * are programmed into REG_DIV by picking an appropriate LFSR state. See
     50 * applnco_compute_tables/applnco_div_translate for details on this.
     51 */
     52
     53#define LFSR_POLY	0xa01
     54#define LFSR_INIT	0x7ff
     55#define LFSR_LEN	11
     56#define LFSR_PERIOD	((1 << LFSR_LEN) - 1)
     57#define LFSR_TBLSIZE	(1 << LFSR_LEN)
     58
     59/* The minimal attainable coarse divisor (first value in table) */
     60#define COARSE_DIV_OFFSET 2
     61
     62struct applnco_tables {
     63	u16 fwd[LFSR_TBLSIZE];
     64	u16 inv[LFSR_TBLSIZE];
     65};
     66
     67struct applnco_channel {
     68	void __iomem *base;
     69	struct applnco_tables *tbl;
     70	struct clk_hw hw;
     71
     72	spinlock_t lock;
     73};
     74
     75#define to_applnco_channel(_hw) container_of(_hw, struct applnco_channel, hw)
     76
     77static void applnco_enable_nolock(struct clk_hw *hw)
     78{
     79	struct applnco_channel *chan = to_applnco_channel(hw);
     80	u32 val;
     81
     82	val = readl_relaxed(chan->base + REG_CTRL);
     83	writel_relaxed(val | CTRL_ENABLE, chan->base + REG_CTRL);
     84}
     85
     86static void applnco_disable_nolock(struct clk_hw *hw)
     87{
     88	struct applnco_channel *chan = to_applnco_channel(hw);
     89	u32 val;
     90
     91	val = readl_relaxed(chan->base + REG_CTRL);
     92	writel_relaxed(val & ~CTRL_ENABLE, chan->base + REG_CTRL);
     93}
     94
     95static int applnco_is_enabled(struct clk_hw *hw)
     96{
     97	struct applnco_channel *chan = to_applnco_channel(hw);
     98
     99	return (readl_relaxed(chan->base + REG_CTRL) & CTRL_ENABLE) != 0;
    100}
    101
    102static void applnco_compute_tables(struct applnco_tables *tbl)
    103{
    104	int i;
    105	u32 state = LFSR_INIT;
    106
    107	/*
    108	 * Go through the states of a Galois LFSR and build
    109	 * a coarse divisor translation table.
    110	 */
    111	for (i = LFSR_PERIOD; i > 0; i--) {
    112		if (state & 1)
    113			state = (state >> 1) ^ (LFSR_POLY >> 1);
    114		else
    115			state = (state >> 1);
    116		tbl->fwd[i] = state;
    117		tbl->inv[state] = i;
    118	}
    119
    120	/* Zero value is special-cased */
    121	tbl->fwd[0] = 0;
    122	tbl->inv[0] = 0;
    123}
    124
    125static bool applnco_div_out_of_range(unsigned int div)
    126{
    127	unsigned int coarse = div / 4;
    128
    129	return coarse < COARSE_DIV_OFFSET ||
    130		coarse >= COARSE_DIV_OFFSET + LFSR_TBLSIZE;
    131}
    132
    133static u32 applnco_div_translate(struct applnco_tables *tbl, unsigned int div)
    134{
    135	unsigned int coarse = div / 4;
    136
    137	if (WARN_ON(applnco_div_out_of_range(div)))
    138		return 0;
    139
    140	return FIELD_PREP(DIV_COARSE, tbl->fwd[coarse - COARSE_DIV_OFFSET]) |
    141			FIELD_PREP(DIV_FINE, div % 4);
    142}
    143
    144static unsigned int applnco_div_translate_inv(struct applnco_tables *tbl, u32 regval)
    145{
    146	unsigned int coarse, fine;
    147
    148	coarse = tbl->inv[FIELD_GET(DIV_COARSE, regval)] + COARSE_DIV_OFFSET;
    149	fine = FIELD_GET(DIV_FINE, regval);
    150
    151	return coarse * 4 + fine;
    152}
    153
    154static int applnco_set_rate(struct clk_hw *hw, unsigned long rate,
    155				unsigned long parent_rate)
    156{
    157	struct applnco_channel *chan = to_applnco_channel(hw);
    158	unsigned long flags;
    159	u32 div, inc1, inc2;
    160	bool was_enabled;
    161
    162	div = 2 * parent_rate / rate;
    163	inc1 = 2 * parent_rate - div * rate;
    164	inc2 = inc1 - rate;
    165
    166	if (applnco_div_out_of_range(div))
    167		return -EINVAL;
    168
    169	div = applnco_div_translate(chan->tbl, div);
    170
    171	spin_lock_irqsave(&chan->lock, flags);
    172	was_enabled = applnco_is_enabled(hw);
    173	applnco_disable_nolock(hw);
    174
    175	writel_relaxed(div,  chan->base + REG_DIV);
    176	writel_relaxed(inc1, chan->base + REG_INC1);
    177	writel_relaxed(inc2, chan->base + REG_INC2);
    178
    179	/* Presumably a neutral initial value for accumulator */
    180	writel_relaxed(1 << 31, chan->base + REG_ACCINIT);
    181
    182	if (was_enabled)
    183		applnco_enable_nolock(hw);
    184	spin_unlock_irqrestore(&chan->lock, flags);
    185
    186	return 0;
    187}
    188
    189static unsigned long applnco_recalc_rate(struct clk_hw *hw,
    190				unsigned long parent_rate)
    191{
    192	struct applnco_channel *chan = to_applnco_channel(hw);
    193	u32 div, inc1, inc2, incbase;
    194
    195	div = applnco_div_translate_inv(chan->tbl,
    196			readl_relaxed(chan->base + REG_DIV));
    197
    198	inc1 = readl_relaxed(chan->base + REG_INC1);
    199	inc2 = readl_relaxed(chan->base + REG_INC2);
    200
    201	/*
    202	 * We don't support wraparound of accumulator
    203	 * nor the edge case of both increments being zero
    204	 */
    205	if (inc1 >= (1 << 31) || inc2 < (1 << 31) || (inc1 == 0 && inc2 == 0))
    206		return 0;
    207
    208	/* Scale both sides of division by incbase to maintain precision */
    209	incbase = inc1 - inc2;
    210
    211	return div64_u64(((u64) parent_rate) * 2 * incbase,
    212			((u64) div) * incbase + inc1);
    213}
    214
    215static long applnco_round_rate(struct clk_hw *hw, unsigned long rate,
    216				unsigned long *parent_rate)
    217{
    218	unsigned long lo = *parent_rate / (COARSE_DIV_OFFSET + LFSR_TBLSIZE) + 1;
    219	unsigned long hi = *parent_rate / COARSE_DIV_OFFSET;
    220
    221	return clamp(rate, lo, hi);
    222}
    223
    224static int applnco_enable(struct clk_hw *hw)
    225{
    226	struct applnco_channel *chan = to_applnco_channel(hw);
    227	unsigned long flags;
    228
    229	spin_lock_irqsave(&chan->lock, flags);
    230	applnco_enable_nolock(hw);
    231	spin_unlock_irqrestore(&chan->lock, flags);
    232
    233	return 0;
    234}
    235
    236static void applnco_disable(struct clk_hw *hw)
    237{
    238	struct applnco_channel *chan = to_applnco_channel(hw);
    239	unsigned long flags;
    240
    241	spin_lock_irqsave(&chan->lock, flags);
    242	applnco_disable_nolock(hw);
    243	spin_unlock_irqrestore(&chan->lock, flags);
    244}
    245
    246static const struct clk_ops applnco_ops = {
    247	.set_rate = applnco_set_rate,
    248	.recalc_rate = applnco_recalc_rate,
    249	.round_rate = applnco_round_rate,
    250	.enable = applnco_enable,
    251	.disable = applnco_disable,
    252	.is_enabled = applnco_is_enabled,
    253};
    254
    255static int applnco_probe(struct platform_device *pdev)
    256{
    257	struct device_node *np = pdev->dev.of_node;
    258	struct clk_parent_data pdata = { .index = 0 };
    259	struct clk_init_data init;
    260	struct clk_hw_onecell_data *onecell_data;
    261	void __iomem *base;
    262	struct resource *res;
    263	struct applnco_tables *tbl;
    264	unsigned int nchannels;
    265	int ret, i;
    266
    267	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
    268	if (IS_ERR(base))
    269		return PTR_ERR(base);
    270
    271	if (resource_size(res) < NCO_CHANNEL_REGSIZE)
    272		return -EINVAL;
    273	nchannels = (resource_size(res) - NCO_CHANNEL_REGSIZE)
    274			/ NCO_CHANNEL_STRIDE + 1;
    275
    276	onecell_data = devm_kzalloc(&pdev->dev, struct_size(onecell_data, hws,
    277							nchannels), GFP_KERNEL);
    278	if (!onecell_data)
    279		return -ENOMEM;
    280	onecell_data->num = nchannels;
    281
    282	tbl = devm_kzalloc(&pdev->dev, sizeof(*tbl), GFP_KERNEL);
    283	if (!tbl)
    284		return -ENOMEM;
    285	applnco_compute_tables(tbl);
    286
    287	for (i = 0; i < nchannels; i++) {
    288		struct applnco_channel *chan;
    289
    290		chan = devm_kzalloc(&pdev->dev, sizeof(*chan), GFP_KERNEL);
    291		if (!chan)
    292			return -ENOMEM;
    293		chan->base = base + NCO_CHANNEL_STRIDE * i;
    294		chan->tbl = tbl;
    295		spin_lock_init(&chan->lock);
    296
    297		memset(&init, 0, sizeof(init));
    298		init.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
    299						"%s-%d", np->name, i);
    300		init.ops = &applnco_ops;
    301		init.parent_data = &pdata;
    302		init.num_parents = 1;
    303		init.flags = 0;
    304
    305		chan->hw.init = &init;
    306		ret = devm_clk_hw_register(&pdev->dev, &chan->hw);
    307		if (ret)
    308			return ret;
    309
    310		onecell_data->hws[i] = &chan->hw;
    311	}
    312
    313	return devm_of_clk_add_hw_provider(&pdev->dev, of_clk_hw_onecell_get,
    314							onecell_data);
    315}
    316
    317static const struct of_device_id applnco_ids[] = {
    318	{ .compatible = "apple,nco" },
    319	{ }
    320};
    321MODULE_DEVICE_TABLE(of, applnco_ids);
    322
    323static struct platform_driver applnco_driver = {
    324	.driver = {
    325		.name = "apple-nco",
    326		.of_match_table = applnco_ids,
    327	},
    328	.probe = applnco_probe,
    329};
    330module_platform_driver(applnco_driver);
    331
    332MODULE_AUTHOR("Martin Povišer <povik+lin@cutebit.org>");
    333MODULE_DESCRIPTION("Clock driver for NCO blocks on Apple SoCs");
    334MODULE_LICENSE("GPL");