cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

snapshot.c (74389B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * linux/kernel/power/snapshot.c
      4 *
      5 * This file provides system snapshot/restore functionality for swsusp.
      6 *
      7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
      8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
      9 */
     10
     11#define pr_fmt(fmt) "PM: hibernation: " fmt
     12
     13#include <linux/version.h>
     14#include <linux/module.h>
     15#include <linux/mm.h>
     16#include <linux/suspend.h>
     17#include <linux/delay.h>
     18#include <linux/bitops.h>
     19#include <linux/spinlock.h>
     20#include <linux/kernel.h>
     21#include <linux/pm.h>
     22#include <linux/device.h>
     23#include <linux/init.h>
     24#include <linux/memblock.h>
     25#include <linux/nmi.h>
     26#include <linux/syscalls.h>
     27#include <linux/console.h>
     28#include <linux/highmem.h>
     29#include <linux/list.h>
     30#include <linux/slab.h>
     31#include <linux/compiler.h>
     32#include <linux/ktime.h>
     33#include <linux/set_memory.h>
     34
     35#include <linux/uaccess.h>
     36#include <asm/mmu_context.h>
     37#include <asm/tlbflush.h>
     38#include <asm/io.h>
     39
     40#include "power.h"
     41
     42#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY)
     43static bool hibernate_restore_protection;
     44static bool hibernate_restore_protection_active;
     45
     46void enable_restore_image_protection(void)
     47{
     48	hibernate_restore_protection = true;
     49}
     50
     51static inline void hibernate_restore_protection_begin(void)
     52{
     53	hibernate_restore_protection_active = hibernate_restore_protection;
     54}
     55
     56static inline void hibernate_restore_protection_end(void)
     57{
     58	hibernate_restore_protection_active = false;
     59}
     60
     61static inline void hibernate_restore_protect_page(void *page_address)
     62{
     63	if (hibernate_restore_protection_active)
     64		set_memory_ro((unsigned long)page_address, 1);
     65}
     66
     67static inline void hibernate_restore_unprotect_page(void *page_address)
     68{
     69	if (hibernate_restore_protection_active)
     70		set_memory_rw((unsigned long)page_address, 1);
     71}
     72#else
     73static inline void hibernate_restore_protection_begin(void) {}
     74static inline void hibernate_restore_protection_end(void) {}
     75static inline void hibernate_restore_protect_page(void *page_address) {}
     76static inline void hibernate_restore_unprotect_page(void *page_address) {}
     77#endif /* CONFIG_STRICT_KERNEL_RWX  && CONFIG_ARCH_HAS_SET_MEMORY */
     78
     79
     80/*
     81 * The calls to set_direct_map_*() should not fail because remapping a page
     82 * here means that we only update protection bits in an existing PTE.
     83 * It is still worth to have a warning here if something changes and this
     84 * will no longer be the case.
     85 */
     86static inline void hibernate_map_page(struct page *page)
     87{
     88	if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
     89		int ret = set_direct_map_default_noflush(page);
     90
     91		if (ret)
     92			pr_warn_once("Failed to remap page\n");
     93	} else {
     94		debug_pagealloc_map_pages(page, 1);
     95	}
     96}
     97
     98static inline void hibernate_unmap_page(struct page *page)
     99{
    100	if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
    101		unsigned long addr = (unsigned long)page_address(page);
    102		int ret  = set_direct_map_invalid_noflush(page);
    103
    104		if (ret)
    105			pr_warn_once("Failed to remap page\n");
    106
    107		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
    108	} else {
    109		debug_pagealloc_unmap_pages(page, 1);
    110	}
    111}
    112
    113static int swsusp_page_is_free(struct page *);
    114static void swsusp_set_page_forbidden(struct page *);
    115static void swsusp_unset_page_forbidden(struct page *);
    116
    117/*
    118 * Number of bytes to reserve for memory allocations made by device drivers
    119 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
    120 * cause image creation to fail (tunable via /sys/power/reserved_size).
    121 */
    122unsigned long reserved_size;
    123
    124void __init hibernate_reserved_size_init(void)
    125{
    126	reserved_size = SPARE_PAGES * PAGE_SIZE;
    127}
    128
    129/*
    130 * Preferred image size in bytes (tunable via /sys/power/image_size).
    131 * When it is set to N, swsusp will do its best to ensure the image
    132 * size will not exceed N bytes, but if that is impossible, it will
    133 * try to create the smallest image possible.
    134 */
    135unsigned long image_size;
    136
    137void __init hibernate_image_size_init(void)
    138{
    139	image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE;
    140}
    141
    142/*
    143 * List of PBEs needed for restoring the pages that were allocated before
    144 * the suspend and included in the suspend image, but have also been
    145 * allocated by the "resume" kernel, so their contents cannot be written
    146 * directly to their "original" page frames.
    147 */
    148struct pbe *restore_pblist;
    149
    150/* struct linked_page is used to build chains of pages */
    151
    152#define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
    153
    154struct linked_page {
    155	struct linked_page *next;
    156	char data[LINKED_PAGE_DATA_SIZE];
    157} __packed;
    158
    159/*
    160 * List of "safe" pages (ie. pages that were not used by the image kernel
    161 * before hibernation) that may be used as temporary storage for image kernel
    162 * memory contents.
    163 */
    164static struct linked_page *safe_pages_list;
    165
    166/* Pointer to an auxiliary buffer (1 page) */
    167static void *buffer;
    168
    169#define PG_ANY		0
    170#define PG_SAFE		1
    171#define PG_UNSAFE_CLEAR	1
    172#define PG_UNSAFE_KEEP	0
    173
    174static unsigned int allocated_unsafe_pages;
    175
    176/**
    177 * get_image_page - Allocate a page for a hibernation image.
    178 * @gfp_mask: GFP mask for the allocation.
    179 * @safe_needed: Get pages that were not used before hibernation (restore only)
    180 *
    181 * During image restoration, for storing the PBE list and the image data, we can
    182 * only use memory pages that do not conflict with the pages used before
    183 * hibernation.  The "unsafe" pages have PageNosaveFree set and we count them
    184 * using allocated_unsafe_pages.
    185 *
    186 * Each allocated image page is marked as PageNosave and PageNosaveFree so that
    187 * swsusp_free() can release it.
    188 */
    189static void *get_image_page(gfp_t gfp_mask, int safe_needed)
    190{
    191	void *res;
    192
    193	res = (void *)get_zeroed_page(gfp_mask);
    194	if (safe_needed)
    195		while (res && swsusp_page_is_free(virt_to_page(res))) {
    196			/* The page is unsafe, mark it for swsusp_free() */
    197			swsusp_set_page_forbidden(virt_to_page(res));
    198			allocated_unsafe_pages++;
    199			res = (void *)get_zeroed_page(gfp_mask);
    200		}
    201	if (res) {
    202		swsusp_set_page_forbidden(virt_to_page(res));
    203		swsusp_set_page_free(virt_to_page(res));
    204	}
    205	return res;
    206}
    207
    208static void *__get_safe_page(gfp_t gfp_mask)
    209{
    210	if (safe_pages_list) {
    211		void *ret = safe_pages_list;
    212
    213		safe_pages_list = safe_pages_list->next;
    214		memset(ret, 0, PAGE_SIZE);
    215		return ret;
    216	}
    217	return get_image_page(gfp_mask, PG_SAFE);
    218}
    219
    220unsigned long get_safe_page(gfp_t gfp_mask)
    221{
    222	return (unsigned long)__get_safe_page(gfp_mask);
    223}
    224
    225static struct page *alloc_image_page(gfp_t gfp_mask)
    226{
    227	struct page *page;
    228
    229	page = alloc_page(gfp_mask);
    230	if (page) {
    231		swsusp_set_page_forbidden(page);
    232		swsusp_set_page_free(page);
    233	}
    234	return page;
    235}
    236
    237static void recycle_safe_page(void *page_address)
    238{
    239	struct linked_page *lp = page_address;
    240
    241	lp->next = safe_pages_list;
    242	safe_pages_list = lp;
    243}
    244
    245/**
    246 * free_image_page - Free a page allocated for hibernation image.
    247 * @addr: Address of the page to free.
    248 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
    249 *
    250 * The page to free should have been allocated by get_image_page() (page flags
    251 * set by it are affected).
    252 */
    253static inline void free_image_page(void *addr, int clear_nosave_free)
    254{
    255	struct page *page;
    256
    257	BUG_ON(!virt_addr_valid(addr));
    258
    259	page = virt_to_page(addr);
    260
    261	swsusp_unset_page_forbidden(page);
    262	if (clear_nosave_free)
    263		swsusp_unset_page_free(page);
    264
    265	__free_page(page);
    266}
    267
    268static inline void free_list_of_pages(struct linked_page *list,
    269				      int clear_page_nosave)
    270{
    271	while (list) {
    272		struct linked_page *lp = list->next;
    273
    274		free_image_page(list, clear_page_nosave);
    275		list = lp;
    276	}
    277}
    278
    279/*
    280 * struct chain_allocator is used for allocating small objects out of
    281 * a linked list of pages called 'the chain'.
    282 *
    283 * The chain grows each time when there is no room for a new object in
    284 * the current page.  The allocated objects cannot be freed individually.
    285 * It is only possible to free them all at once, by freeing the entire
    286 * chain.
    287 *
    288 * NOTE: The chain allocator may be inefficient if the allocated objects
    289 * are not much smaller than PAGE_SIZE.
    290 */
    291struct chain_allocator {
    292	struct linked_page *chain;	/* the chain */
    293	unsigned int used_space;	/* total size of objects allocated out
    294					   of the current page */
    295	gfp_t gfp_mask;		/* mask for allocating pages */
    296	int safe_needed;	/* if set, only "safe" pages are allocated */
    297};
    298
    299static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask,
    300		       int safe_needed)
    301{
    302	ca->chain = NULL;
    303	ca->used_space = LINKED_PAGE_DATA_SIZE;
    304	ca->gfp_mask = gfp_mask;
    305	ca->safe_needed = safe_needed;
    306}
    307
    308static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
    309{
    310	void *ret;
    311
    312	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
    313		struct linked_page *lp;
    314
    315		lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) :
    316					get_image_page(ca->gfp_mask, PG_ANY);
    317		if (!lp)
    318			return NULL;
    319
    320		lp->next = ca->chain;
    321		ca->chain = lp;
    322		ca->used_space = 0;
    323	}
    324	ret = ca->chain->data + ca->used_space;
    325	ca->used_space += size;
    326	return ret;
    327}
    328
    329/*
    330 * Data types related to memory bitmaps.
    331 *
    332 * Memory bitmap is a structure consisting of many linked lists of
    333 * objects.  The main list's elements are of type struct zone_bitmap
    334 * and each of them corresponds to one zone.  For each zone bitmap
    335 * object there is a list of objects of type struct bm_block that
    336 * represent each blocks of bitmap in which information is stored.
    337 *
    338 * struct memory_bitmap contains a pointer to the main list of zone
    339 * bitmap objects, a struct bm_position used for browsing the bitmap,
    340 * and a pointer to the list of pages used for allocating all of the
    341 * zone bitmap objects and bitmap block objects.
    342 *
    343 * NOTE: It has to be possible to lay out the bitmap in memory
    344 * using only allocations of order 0.  Additionally, the bitmap is
    345 * designed to work with arbitrary number of zones (this is over the
    346 * top for now, but let's avoid making unnecessary assumptions ;-).
    347 *
    348 * struct zone_bitmap contains a pointer to a list of bitmap block
    349 * objects and a pointer to the bitmap block object that has been
    350 * most recently used for setting bits.  Additionally, it contains the
    351 * PFNs that correspond to the start and end of the represented zone.
    352 *
    353 * struct bm_block contains a pointer to the memory page in which
    354 * information is stored (in the form of a block of bitmap)
    355 * It also contains the pfns that correspond to the start and end of
    356 * the represented memory area.
    357 *
    358 * The memory bitmap is organized as a radix tree to guarantee fast random
    359 * access to the bits. There is one radix tree for each zone (as returned
    360 * from create_mem_extents).
    361 *
    362 * One radix tree is represented by one struct mem_zone_bm_rtree. There are
    363 * two linked lists for the nodes of the tree, one for the inner nodes and
    364 * one for the leave nodes. The linked leave nodes are used for fast linear
    365 * access of the memory bitmap.
    366 *
    367 * The struct rtree_node represents one node of the radix tree.
    368 */
    369
    370#define BM_END_OF_MAP	(~0UL)
    371
    372#define BM_BITS_PER_BLOCK	(PAGE_SIZE * BITS_PER_BYTE)
    373#define BM_BLOCK_SHIFT		(PAGE_SHIFT + 3)
    374#define BM_BLOCK_MASK		((1UL << BM_BLOCK_SHIFT) - 1)
    375
    376/*
    377 * struct rtree_node is a wrapper struct to link the nodes
    378 * of the rtree together for easy linear iteration over
    379 * bits and easy freeing
    380 */
    381struct rtree_node {
    382	struct list_head list;
    383	unsigned long *data;
    384};
    385
    386/*
    387 * struct mem_zone_bm_rtree represents a bitmap used for one
    388 * populated memory zone.
    389 */
    390struct mem_zone_bm_rtree {
    391	struct list_head list;		/* Link Zones together         */
    392	struct list_head nodes;		/* Radix Tree inner nodes      */
    393	struct list_head leaves;	/* Radix Tree leaves           */
    394	unsigned long start_pfn;	/* Zone start page frame       */
    395	unsigned long end_pfn;		/* Zone end page frame + 1     */
    396	struct rtree_node *rtree;	/* Radix Tree Root             */
    397	int levels;			/* Number of Radix Tree Levels */
    398	unsigned int blocks;		/* Number of Bitmap Blocks     */
    399};
    400
    401/* strcut bm_position is used for browsing memory bitmaps */
    402
    403struct bm_position {
    404	struct mem_zone_bm_rtree *zone;
    405	struct rtree_node *node;
    406	unsigned long node_pfn;
    407	int node_bit;
    408};
    409
    410struct memory_bitmap {
    411	struct list_head zones;
    412	struct linked_page *p_list;	/* list of pages used to store zone
    413					   bitmap objects and bitmap block
    414					   objects */
    415	struct bm_position cur;	/* most recently used bit position */
    416};
    417
    418/* Functions that operate on memory bitmaps */
    419
    420#define BM_ENTRIES_PER_LEVEL	(PAGE_SIZE / sizeof(unsigned long))
    421#if BITS_PER_LONG == 32
    422#define BM_RTREE_LEVEL_SHIFT	(PAGE_SHIFT - 2)
    423#else
    424#define BM_RTREE_LEVEL_SHIFT	(PAGE_SHIFT - 3)
    425#endif
    426#define BM_RTREE_LEVEL_MASK	((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
    427
    428/**
    429 * alloc_rtree_node - Allocate a new node and add it to the radix tree.
    430 * @gfp_mask: GFP mask for the allocation.
    431 * @safe_needed: Get pages not used before hibernation (restore only)
    432 * @ca: Pointer to a linked list of pages ("a chain") to allocate from
    433 * @list: Radix Tree node to add.
    434 *
    435 * This function is used to allocate inner nodes as well as the
    436 * leave nodes of the radix tree. It also adds the node to the
    437 * corresponding linked list passed in by the *list parameter.
    438 */
    439static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
    440					   struct chain_allocator *ca,
    441					   struct list_head *list)
    442{
    443	struct rtree_node *node;
    444
    445	node = chain_alloc(ca, sizeof(struct rtree_node));
    446	if (!node)
    447		return NULL;
    448
    449	node->data = get_image_page(gfp_mask, safe_needed);
    450	if (!node->data)
    451		return NULL;
    452
    453	list_add_tail(&node->list, list);
    454
    455	return node;
    456}
    457
    458/**
    459 * add_rtree_block - Add a new leave node to the radix tree.
    460 *
    461 * The leave nodes need to be allocated in order to keep the leaves
    462 * linked list in order. This is guaranteed by the zone->blocks
    463 * counter.
    464 */
    465static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
    466			   int safe_needed, struct chain_allocator *ca)
    467{
    468	struct rtree_node *node, *block, **dst;
    469	unsigned int levels_needed, block_nr;
    470	int i;
    471
    472	block_nr = zone->blocks;
    473	levels_needed = 0;
    474
    475	/* How many levels do we need for this block nr? */
    476	while (block_nr) {
    477		levels_needed += 1;
    478		block_nr >>= BM_RTREE_LEVEL_SHIFT;
    479	}
    480
    481	/* Make sure the rtree has enough levels */
    482	for (i = zone->levels; i < levels_needed; i++) {
    483		node = alloc_rtree_node(gfp_mask, safe_needed, ca,
    484					&zone->nodes);
    485		if (!node)
    486			return -ENOMEM;
    487
    488		node->data[0] = (unsigned long)zone->rtree;
    489		zone->rtree = node;
    490		zone->levels += 1;
    491	}
    492
    493	/* Allocate new block */
    494	block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves);
    495	if (!block)
    496		return -ENOMEM;
    497
    498	/* Now walk the rtree to insert the block */
    499	node = zone->rtree;
    500	dst = &zone->rtree;
    501	block_nr = zone->blocks;
    502	for (i = zone->levels; i > 0; i--) {
    503		int index;
    504
    505		if (!node) {
    506			node = alloc_rtree_node(gfp_mask, safe_needed, ca,
    507						&zone->nodes);
    508			if (!node)
    509				return -ENOMEM;
    510			*dst = node;
    511		}
    512
    513		index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
    514		index &= BM_RTREE_LEVEL_MASK;
    515		dst = (struct rtree_node **)&((*dst)->data[index]);
    516		node = *dst;
    517	}
    518
    519	zone->blocks += 1;
    520	*dst = block;
    521
    522	return 0;
    523}
    524
    525static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
    526			       int clear_nosave_free);
    527
    528/**
    529 * create_zone_bm_rtree - Create a radix tree for one zone.
    530 *
    531 * Allocated the mem_zone_bm_rtree structure and initializes it.
    532 * This function also allocated and builds the radix tree for the
    533 * zone.
    534 */
    535static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask,
    536						      int safe_needed,
    537						      struct chain_allocator *ca,
    538						      unsigned long start,
    539						      unsigned long end)
    540{
    541	struct mem_zone_bm_rtree *zone;
    542	unsigned int i, nr_blocks;
    543	unsigned long pages;
    544
    545	pages = end - start;
    546	zone  = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree));
    547	if (!zone)
    548		return NULL;
    549
    550	INIT_LIST_HEAD(&zone->nodes);
    551	INIT_LIST_HEAD(&zone->leaves);
    552	zone->start_pfn = start;
    553	zone->end_pfn = end;
    554	nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
    555
    556	for (i = 0; i < nr_blocks; i++) {
    557		if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) {
    558			free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR);
    559			return NULL;
    560		}
    561	}
    562
    563	return zone;
    564}
    565
    566/**
    567 * free_zone_bm_rtree - Free the memory of the radix tree.
    568 *
    569 * Free all node pages of the radix tree. The mem_zone_bm_rtree
    570 * structure itself is not freed here nor are the rtree_node
    571 * structs.
    572 */
    573static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
    574			       int clear_nosave_free)
    575{
    576	struct rtree_node *node;
    577
    578	list_for_each_entry(node, &zone->nodes, list)
    579		free_image_page(node->data, clear_nosave_free);
    580
    581	list_for_each_entry(node, &zone->leaves, list)
    582		free_image_page(node->data, clear_nosave_free);
    583}
    584
    585static void memory_bm_position_reset(struct memory_bitmap *bm)
    586{
    587	bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
    588				  list);
    589	bm->cur.node = list_entry(bm->cur.zone->leaves.next,
    590				  struct rtree_node, list);
    591	bm->cur.node_pfn = 0;
    592	bm->cur.node_bit = 0;
    593}
    594
    595static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
    596
    597struct mem_extent {
    598	struct list_head hook;
    599	unsigned long start;
    600	unsigned long end;
    601};
    602
    603/**
    604 * free_mem_extents - Free a list of memory extents.
    605 * @list: List of extents to free.
    606 */
    607static void free_mem_extents(struct list_head *list)
    608{
    609	struct mem_extent *ext, *aux;
    610
    611	list_for_each_entry_safe(ext, aux, list, hook) {
    612		list_del(&ext->hook);
    613		kfree(ext);
    614	}
    615}
    616
    617/**
    618 * create_mem_extents - Create a list of memory extents.
    619 * @list: List to put the extents into.
    620 * @gfp_mask: Mask to use for memory allocations.
    621 *
    622 * The extents represent contiguous ranges of PFNs.
    623 */
    624static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
    625{
    626	struct zone *zone;
    627
    628	INIT_LIST_HEAD(list);
    629
    630	for_each_populated_zone(zone) {
    631		unsigned long zone_start, zone_end;
    632		struct mem_extent *ext, *cur, *aux;
    633
    634		zone_start = zone->zone_start_pfn;
    635		zone_end = zone_end_pfn(zone);
    636
    637		list_for_each_entry(ext, list, hook)
    638			if (zone_start <= ext->end)
    639				break;
    640
    641		if (&ext->hook == list || zone_end < ext->start) {
    642			/* New extent is necessary */
    643			struct mem_extent *new_ext;
    644
    645			new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
    646			if (!new_ext) {
    647				free_mem_extents(list);
    648				return -ENOMEM;
    649			}
    650			new_ext->start = zone_start;
    651			new_ext->end = zone_end;
    652			list_add_tail(&new_ext->hook, &ext->hook);
    653			continue;
    654		}
    655
    656		/* Merge this zone's range of PFNs with the existing one */
    657		if (zone_start < ext->start)
    658			ext->start = zone_start;
    659		if (zone_end > ext->end)
    660			ext->end = zone_end;
    661
    662		/* More merging may be possible */
    663		cur = ext;
    664		list_for_each_entry_safe_continue(cur, aux, list, hook) {
    665			if (zone_end < cur->start)
    666				break;
    667			if (zone_end < cur->end)
    668				ext->end = cur->end;
    669			list_del(&cur->hook);
    670			kfree(cur);
    671		}
    672	}
    673
    674	return 0;
    675}
    676
    677/**
    678 * memory_bm_create - Allocate memory for a memory bitmap.
    679 */
    680static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
    681			    int safe_needed)
    682{
    683	struct chain_allocator ca;
    684	struct list_head mem_extents;
    685	struct mem_extent *ext;
    686	int error;
    687
    688	chain_init(&ca, gfp_mask, safe_needed);
    689	INIT_LIST_HEAD(&bm->zones);
    690
    691	error = create_mem_extents(&mem_extents, gfp_mask);
    692	if (error)
    693		return error;
    694
    695	list_for_each_entry(ext, &mem_extents, hook) {
    696		struct mem_zone_bm_rtree *zone;
    697
    698		zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca,
    699					    ext->start, ext->end);
    700		if (!zone) {
    701			error = -ENOMEM;
    702			goto Error;
    703		}
    704		list_add_tail(&zone->list, &bm->zones);
    705	}
    706
    707	bm->p_list = ca.chain;
    708	memory_bm_position_reset(bm);
    709 Exit:
    710	free_mem_extents(&mem_extents);
    711	return error;
    712
    713 Error:
    714	bm->p_list = ca.chain;
    715	memory_bm_free(bm, PG_UNSAFE_CLEAR);
    716	goto Exit;
    717}
    718
    719/**
    720 * memory_bm_free - Free memory occupied by the memory bitmap.
    721 * @bm: Memory bitmap.
    722 */
    723static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
    724{
    725	struct mem_zone_bm_rtree *zone;
    726
    727	list_for_each_entry(zone, &bm->zones, list)
    728		free_zone_bm_rtree(zone, clear_nosave_free);
    729
    730	free_list_of_pages(bm->p_list, clear_nosave_free);
    731
    732	INIT_LIST_HEAD(&bm->zones);
    733}
    734
    735/**
    736 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
    737 *
    738 * Find the bit in memory bitmap @bm that corresponds to the given PFN.
    739 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
    740 *
    741 * Walk the radix tree to find the page containing the bit that represents @pfn
    742 * and return the position of the bit in @addr and @bit_nr.
    743 */
    744static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
    745			      void **addr, unsigned int *bit_nr)
    746{
    747	struct mem_zone_bm_rtree *curr, *zone;
    748	struct rtree_node *node;
    749	int i, block_nr;
    750
    751	zone = bm->cur.zone;
    752
    753	if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
    754		goto zone_found;
    755
    756	zone = NULL;
    757
    758	/* Find the right zone */
    759	list_for_each_entry(curr, &bm->zones, list) {
    760		if (pfn >= curr->start_pfn && pfn < curr->end_pfn) {
    761			zone = curr;
    762			break;
    763		}
    764	}
    765
    766	if (!zone)
    767		return -EFAULT;
    768
    769zone_found:
    770	/*
    771	 * We have found the zone. Now walk the radix tree to find the leaf node
    772	 * for our PFN.
    773	 */
    774
    775	/*
    776	 * If the zone we wish to scan is the current zone and the
    777	 * pfn falls into the current node then we do not need to walk
    778	 * the tree.
    779	 */
    780	node = bm->cur.node;
    781	if (zone == bm->cur.zone &&
    782	    ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
    783		goto node_found;
    784
    785	node      = zone->rtree;
    786	block_nr  = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT;
    787
    788	for (i = zone->levels; i > 0; i--) {
    789		int index;
    790
    791		index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
    792		index &= BM_RTREE_LEVEL_MASK;
    793		BUG_ON(node->data[index] == 0);
    794		node = (struct rtree_node *)node->data[index];
    795	}
    796
    797node_found:
    798	/* Update last position */
    799	bm->cur.zone = zone;
    800	bm->cur.node = node;
    801	bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
    802
    803	/* Set return values */
    804	*addr = node->data;
    805	*bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK;
    806
    807	return 0;
    808}
    809
    810static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
    811{
    812	void *addr;
    813	unsigned int bit;
    814	int error;
    815
    816	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
    817	BUG_ON(error);
    818	set_bit(bit, addr);
    819}
    820
    821static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
    822{
    823	void *addr;
    824	unsigned int bit;
    825	int error;
    826
    827	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
    828	if (!error)
    829		set_bit(bit, addr);
    830
    831	return error;
    832}
    833
    834static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
    835{
    836	void *addr;
    837	unsigned int bit;
    838	int error;
    839
    840	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
    841	BUG_ON(error);
    842	clear_bit(bit, addr);
    843}
    844
    845static void memory_bm_clear_current(struct memory_bitmap *bm)
    846{
    847	int bit;
    848
    849	bit = max(bm->cur.node_bit - 1, 0);
    850	clear_bit(bit, bm->cur.node->data);
    851}
    852
    853static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
    854{
    855	void *addr;
    856	unsigned int bit;
    857	int error;
    858
    859	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
    860	BUG_ON(error);
    861	return test_bit(bit, addr);
    862}
    863
    864static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
    865{
    866	void *addr;
    867	unsigned int bit;
    868
    869	return !memory_bm_find_bit(bm, pfn, &addr, &bit);
    870}
    871
    872/*
    873 * rtree_next_node - Jump to the next leaf node.
    874 *
    875 * Set the position to the beginning of the next node in the
    876 * memory bitmap. This is either the next node in the current
    877 * zone's radix tree or the first node in the radix tree of the
    878 * next zone.
    879 *
    880 * Return true if there is a next node, false otherwise.
    881 */
    882static bool rtree_next_node(struct memory_bitmap *bm)
    883{
    884	if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
    885		bm->cur.node = list_entry(bm->cur.node->list.next,
    886					  struct rtree_node, list);
    887		bm->cur.node_pfn += BM_BITS_PER_BLOCK;
    888		bm->cur.node_bit  = 0;
    889		touch_softlockup_watchdog();
    890		return true;
    891	}
    892
    893	/* No more nodes, goto next zone */
    894	if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
    895		bm->cur.zone = list_entry(bm->cur.zone->list.next,
    896				  struct mem_zone_bm_rtree, list);
    897		bm->cur.node = list_entry(bm->cur.zone->leaves.next,
    898					  struct rtree_node, list);
    899		bm->cur.node_pfn = 0;
    900		bm->cur.node_bit = 0;
    901		return true;
    902	}
    903
    904	/* No more zones */
    905	return false;
    906}
    907
    908/**
    909 * memory_bm_next_pfn - Find the next set bit in a memory bitmap.
    910 * @bm: Memory bitmap.
    911 *
    912 * Starting from the last returned position this function searches for the next
    913 * set bit in @bm and returns the PFN represented by it.  If no more bits are
    914 * set, BM_END_OF_MAP is returned.
    915 *
    916 * It is required to run memory_bm_position_reset() before the first call to
    917 * this function for the given memory bitmap.
    918 */
    919static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
    920{
    921	unsigned long bits, pfn, pages;
    922	int bit;
    923
    924	do {
    925		pages	  = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn;
    926		bits      = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK);
    927		bit	  = find_next_bit(bm->cur.node->data, bits,
    928					  bm->cur.node_bit);
    929		if (bit < bits) {
    930			pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
    931			bm->cur.node_bit = bit + 1;
    932			return pfn;
    933		}
    934	} while (rtree_next_node(bm));
    935
    936	return BM_END_OF_MAP;
    937}
    938
    939/*
    940 * This structure represents a range of page frames the contents of which
    941 * should not be saved during hibernation.
    942 */
    943struct nosave_region {
    944	struct list_head list;
    945	unsigned long start_pfn;
    946	unsigned long end_pfn;
    947};
    948
    949static LIST_HEAD(nosave_regions);
    950
    951static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
    952{
    953	struct rtree_node *node;
    954
    955	list_for_each_entry(node, &zone->nodes, list)
    956		recycle_safe_page(node->data);
    957
    958	list_for_each_entry(node, &zone->leaves, list)
    959		recycle_safe_page(node->data);
    960}
    961
    962static void memory_bm_recycle(struct memory_bitmap *bm)
    963{
    964	struct mem_zone_bm_rtree *zone;
    965	struct linked_page *p_list;
    966
    967	list_for_each_entry(zone, &bm->zones, list)
    968		recycle_zone_bm_rtree(zone);
    969
    970	p_list = bm->p_list;
    971	while (p_list) {
    972		struct linked_page *lp = p_list;
    973
    974		p_list = lp->next;
    975		recycle_safe_page(lp);
    976	}
    977}
    978
    979/**
    980 * register_nosave_region - Register a region of unsaveable memory.
    981 *
    982 * Register a range of page frames the contents of which should not be saved
    983 * during hibernation (to be used in the early initialization code).
    984 */
    985void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
    986{
    987	struct nosave_region *region;
    988
    989	if (start_pfn >= end_pfn)
    990		return;
    991
    992	if (!list_empty(&nosave_regions)) {
    993		/* Try to extend the previous region (they should be sorted) */
    994		region = list_entry(nosave_regions.prev,
    995					struct nosave_region, list);
    996		if (region->end_pfn == start_pfn) {
    997			region->end_pfn = end_pfn;
    998			goto Report;
    999		}
   1000	}
   1001	/* This allocation cannot fail */
   1002	region = memblock_alloc(sizeof(struct nosave_region),
   1003				SMP_CACHE_BYTES);
   1004	if (!region)
   1005		panic("%s: Failed to allocate %zu bytes\n", __func__,
   1006		      sizeof(struct nosave_region));
   1007	region->start_pfn = start_pfn;
   1008	region->end_pfn = end_pfn;
   1009	list_add_tail(&region->list, &nosave_regions);
   1010 Report:
   1011	pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n",
   1012		(unsigned long long) start_pfn << PAGE_SHIFT,
   1013		((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
   1014}
   1015
   1016/*
   1017 * Set bits in this map correspond to the page frames the contents of which
   1018 * should not be saved during the suspend.
   1019 */
   1020static struct memory_bitmap *forbidden_pages_map;
   1021
   1022/* Set bits in this map correspond to free page frames. */
   1023static struct memory_bitmap *free_pages_map;
   1024
   1025/*
   1026 * Each page frame allocated for creating the image is marked by setting the
   1027 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
   1028 */
   1029
   1030void swsusp_set_page_free(struct page *page)
   1031{
   1032	if (free_pages_map)
   1033		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
   1034}
   1035
   1036static int swsusp_page_is_free(struct page *page)
   1037{
   1038	return free_pages_map ?
   1039		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
   1040}
   1041
   1042void swsusp_unset_page_free(struct page *page)
   1043{
   1044	if (free_pages_map)
   1045		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
   1046}
   1047
   1048static void swsusp_set_page_forbidden(struct page *page)
   1049{
   1050	if (forbidden_pages_map)
   1051		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
   1052}
   1053
   1054int swsusp_page_is_forbidden(struct page *page)
   1055{
   1056	return forbidden_pages_map ?
   1057		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
   1058}
   1059
   1060static void swsusp_unset_page_forbidden(struct page *page)
   1061{
   1062	if (forbidden_pages_map)
   1063		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
   1064}
   1065
   1066/**
   1067 * mark_nosave_pages - Mark pages that should not be saved.
   1068 * @bm: Memory bitmap.
   1069 *
   1070 * Set the bits in @bm that correspond to the page frames the contents of which
   1071 * should not be saved.
   1072 */
   1073static void mark_nosave_pages(struct memory_bitmap *bm)
   1074{
   1075	struct nosave_region *region;
   1076
   1077	if (list_empty(&nosave_regions))
   1078		return;
   1079
   1080	list_for_each_entry(region, &nosave_regions, list) {
   1081		unsigned long pfn;
   1082
   1083		pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n",
   1084			 (unsigned long long) region->start_pfn << PAGE_SHIFT,
   1085			 ((unsigned long long) region->end_pfn << PAGE_SHIFT)
   1086				- 1);
   1087
   1088		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
   1089			if (pfn_valid(pfn)) {
   1090				/*
   1091				 * It is safe to ignore the result of
   1092				 * mem_bm_set_bit_check() here, since we won't
   1093				 * touch the PFNs for which the error is
   1094				 * returned anyway.
   1095				 */
   1096				mem_bm_set_bit_check(bm, pfn);
   1097			}
   1098	}
   1099}
   1100
   1101/**
   1102 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
   1103 *
   1104 * Create bitmaps needed for marking page frames that should not be saved and
   1105 * free page frames.  The forbidden_pages_map and free_pages_map pointers are
   1106 * only modified if everything goes well, because we don't want the bits to be
   1107 * touched before both bitmaps are set up.
   1108 */
   1109int create_basic_memory_bitmaps(void)
   1110{
   1111	struct memory_bitmap *bm1, *bm2;
   1112	int error = 0;
   1113
   1114	if (forbidden_pages_map && free_pages_map)
   1115		return 0;
   1116	else
   1117		BUG_ON(forbidden_pages_map || free_pages_map);
   1118
   1119	bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
   1120	if (!bm1)
   1121		return -ENOMEM;
   1122
   1123	error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
   1124	if (error)
   1125		goto Free_first_object;
   1126
   1127	bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
   1128	if (!bm2)
   1129		goto Free_first_bitmap;
   1130
   1131	error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
   1132	if (error)
   1133		goto Free_second_object;
   1134
   1135	forbidden_pages_map = bm1;
   1136	free_pages_map = bm2;
   1137	mark_nosave_pages(forbidden_pages_map);
   1138
   1139	pr_debug("Basic memory bitmaps created\n");
   1140
   1141	return 0;
   1142
   1143 Free_second_object:
   1144	kfree(bm2);
   1145 Free_first_bitmap:
   1146	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
   1147 Free_first_object:
   1148	kfree(bm1);
   1149	return -ENOMEM;
   1150}
   1151
   1152/**
   1153 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
   1154 *
   1155 * Free memory bitmaps allocated by create_basic_memory_bitmaps().  The
   1156 * auxiliary pointers are necessary so that the bitmaps themselves are not
   1157 * referred to while they are being freed.
   1158 */
   1159void free_basic_memory_bitmaps(void)
   1160{
   1161	struct memory_bitmap *bm1, *bm2;
   1162
   1163	if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
   1164		return;
   1165
   1166	bm1 = forbidden_pages_map;
   1167	bm2 = free_pages_map;
   1168	forbidden_pages_map = NULL;
   1169	free_pages_map = NULL;
   1170	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
   1171	kfree(bm1);
   1172	memory_bm_free(bm2, PG_UNSAFE_CLEAR);
   1173	kfree(bm2);
   1174
   1175	pr_debug("Basic memory bitmaps freed\n");
   1176}
   1177
   1178static void clear_or_poison_free_page(struct page *page)
   1179{
   1180	if (page_poisoning_enabled_static())
   1181		__kernel_poison_pages(page, 1);
   1182	else if (want_init_on_free())
   1183		clear_highpage(page);
   1184}
   1185
   1186void clear_or_poison_free_pages(void)
   1187{
   1188	struct memory_bitmap *bm = free_pages_map;
   1189	unsigned long pfn;
   1190
   1191	if (WARN_ON(!(free_pages_map)))
   1192		return;
   1193
   1194	if (page_poisoning_enabled() || want_init_on_free()) {
   1195		memory_bm_position_reset(bm);
   1196		pfn = memory_bm_next_pfn(bm);
   1197		while (pfn != BM_END_OF_MAP) {
   1198			if (pfn_valid(pfn))
   1199				clear_or_poison_free_page(pfn_to_page(pfn));
   1200
   1201			pfn = memory_bm_next_pfn(bm);
   1202		}
   1203		memory_bm_position_reset(bm);
   1204		pr_info("free pages cleared after restore\n");
   1205	}
   1206}
   1207
   1208/**
   1209 * snapshot_additional_pages - Estimate the number of extra pages needed.
   1210 * @zone: Memory zone to carry out the computation for.
   1211 *
   1212 * Estimate the number of additional pages needed for setting up a hibernation
   1213 * image data structures for @zone (usually, the returned value is greater than
   1214 * the exact number).
   1215 */
   1216unsigned int snapshot_additional_pages(struct zone *zone)
   1217{
   1218	unsigned int rtree, nodes;
   1219
   1220	rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
   1221	rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node),
   1222			      LINKED_PAGE_DATA_SIZE);
   1223	while (nodes > 1) {
   1224		nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL);
   1225		rtree += nodes;
   1226	}
   1227
   1228	return 2 * rtree;
   1229}
   1230
   1231#ifdef CONFIG_HIGHMEM
   1232/**
   1233 * count_free_highmem_pages - Compute the total number of free highmem pages.
   1234 *
   1235 * The returned number is system-wide.
   1236 */
   1237static unsigned int count_free_highmem_pages(void)
   1238{
   1239	struct zone *zone;
   1240	unsigned int cnt = 0;
   1241
   1242	for_each_populated_zone(zone)
   1243		if (is_highmem(zone))
   1244			cnt += zone_page_state(zone, NR_FREE_PAGES);
   1245
   1246	return cnt;
   1247}
   1248
   1249/**
   1250 * saveable_highmem_page - Check if a highmem page is saveable.
   1251 *
   1252 * Determine whether a highmem page should be included in a hibernation image.
   1253 *
   1254 * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
   1255 * and it isn't part of a free chunk of pages.
   1256 */
   1257static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
   1258{
   1259	struct page *page;
   1260
   1261	if (!pfn_valid(pfn))
   1262		return NULL;
   1263
   1264	page = pfn_to_online_page(pfn);
   1265	if (!page || page_zone(page) != zone)
   1266		return NULL;
   1267
   1268	BUG_ON(!PageHighMem(page));
   1269
   1270	if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page))
   1271		return NULL;
   1272
   1273	if (PageReserved(page) || PageOffline(page))
   1274		return NULL;
   1275
   1276	if (page_is_guard(page))
   1277		return NULL;
   1278
   1279	return page;
   1280}
   1281
   1282/**
   1283 * count_highmem_pages - Compute the total number of saveable highmem pages.
   1284 */
   1285static unsigned int count_highmem_pages(void)
   1286{
   1287	struct zone *zone;
   1288	unsigned int n = 0;
   1289
   1290	for_each_populated_zone(zone) {
   1291		unsigned long pfn, max_zone_pfn;
   1292
   1293		if (!is_highmem(zone))
   1294			continue;
   1295
   1296		mark_free_pages(zone);
   1297		max_zone_pfn = zone_end_pfn(zone);
   1298		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
   1299			if (saveable_highmem_page(zone, pfn))
   1300				n++;
   1301	}
   1302	return n;
   1303}
   1304#else
   1305static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
   1306{
   1307	return NULL;
   1308}
   1309#endif /* CONFIG_HIGHMEM */
   1310
   1311/**
   1312 * saveable_page - Check if the given page is saveable.
   1313 *
   1314 * Determine whether a non-highmem page should be included in a hibernation
   1315 * image.
   1316 *
   1317 * We should save the page if it isn't Nosave, and is not in the range
   1318 * of pages statically defined as 'unsaveable', and it isn't part of
   1319 * a free chunk of pages.
   1320 */
   1321static struct page *saveable_page(struct zone *zone, unsigned long pfn)
   1322{
   1323	struct page *page;
   1324
   1325	if (!pfn_valid(pfn))
   1326		return NULL;
   1327
   1328	page = pfn_to_online_page(pfn);
   1329	if (!page || page_zone(page) != zone)
   1330		return NULL;
   1331
   1332	BUG_ON(PageHighMem(page));
   1333
   1334	if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
   1335		return NULL;
   1336
   1337	if (PageOffline(page))
   1338		return NULL;
   1339
   1340	if (PageReserved(page)
   1341	    && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
   1342		return NULL;
   1343
   1344	if (page_is_guard(page))
   1345		return NULL;
   1346
   1347	return page;
   1348}
   1349
   1350/**
   1351 * count_data_pages - Compute the total number of saveable non-highmem pages.
   1352 */
   1353static unsigned int count_data_pages(void)
   1354{
   1355	struct zone *zone;
   1356	unsigned long pfn, max_zone_pfn;
   1357	unsigned int n = 0;
   1358
   1359	for_each_populated_zone(zone) {
   1360		if (is_highmem(zone))
   1361			continue;
   1362
   1363		mark_free_pages(zone);
   1364		max_zone_pfn = zone_end_pfn(zone);
   1365		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
   1366			if (saveable_page(zone, pfn))
   1367				n++;
   1368	}
   1369	return n;
   1370}
   1371
   1372/*
   1373 * This is needed, because copy_page and memcpy are not usable for copying
   1374 * task structs.
   1375 */
   1376static inline void do_copy_page(long *dst, long *src)
   1377{
   1378	int n;
   1379
   1380	for (n = PAGE_SIZE / sizeof(long); n; n--)
   1381		*dst++ = *src++;
   1382}
   1383
   1384/**
   1385 * safe_copy_page - Copy a page in a safe way.
   1386 *
   1387 * Check if the page we are going to copy is marked as present in the kernel
   1388 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
   1389 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
   1390 * always returns 'true'.
   1391 */
   1392static void safe_copy_page(void *dst, struct page *s_page)
   1393{
   1394	if (kernel_page_present(s_page)) {
   1395		do_copy_page(dst, page_address(s_page));
   1396	} else {
   1397		hibernate_map_page(s_page);
   1398		do_copy_page(dst, page_address(s_page));
   1399		hibernate_unmap_page(s_page);
   1400	}
   1401}
   1402
   1403#ifdef CONFIG_HIGHMEM
   1404static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
   1405{
   1406	return is_highmem(zone) ?
   1407		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
   1408}
   1409
   1410static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
   1411{
   1412	struct page *s_page, *d_page;
   1413	void *src, *dst;
   1414
   1415	s_page = pfn_to_page(src_pfn);
   1416	d_page = pfn_to_page(dst_pfn);
   1417	if (PageHighMem(s_page)) {
   1418		src = kmap_atomic(s_page);
   1419		dst = kmap_atomic(d_page);
   1420		do_copy_page(dst, src);
   1421		kunmap_atomic(dst);
   1422		kunmap_atomic(src);
   1423	} else {
   1424		if (PageHighMem(d_page)) {
   1425			/*
   1426			 * The page pointed to by src may contain some kernel
   1427			 * data modified by kmap_atomic()
   1428			 */
   1429			safe_copy_page(buffer, s_page);
   1430			dst = kmap_atomic(d_page);
   1431			copy_page(dst, buffer);
   1432			kunmap_atomic(dst);
   1433		} else {
   1434			safe_copy_page(page_address(d_page), s_page);
   1435		}
   1436	}
   1437}
   1438#else
   1439#define page_is_saveable(zone, pfn)	saveable_page(zone, pfn)
   1440
   1441static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
   1442{
   1443	safe_copy_page(page_address(pfn_to_page(dst_pfn)),
   1444				pfn_to_page(src_pfn));
   1445}
   1446#endif /* CONFIG_HIGHMEM */
   1447
   1448static void copy_data_pages(struct memory_bitmap *copy_bm,
   1449			    struct memory_bitmap *orig_bm)
   1450{
   1451	struct zone *zone;
   1452	unsigned long pfn;
   1453
   1454	for_each_populated_zone(zone) {
   1455		unsigned long max_zone_pfn;
   1456
   1457		mark_free_pages(zone);
   1458		max_zone_pfn = zone_end_pfn(zone);
   1459		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
   1460			if (page_is_saveable(zone, pfn))
   1461				memory_bm_set_bit(orig_bm, pfn);
   1462	}
   1463	memory_bm_position_reset(orig_bm);
   1464	memory_bm_position_reset(copy_bm);
   1465	for(;;) {
   1466		pfn = memory_bm_next_pfn(orig_bm);
   1467		if (unlikely(pfn == BM_END_OF_MAP))
   1468			break;
   1469		copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
   1470	}
   1471}
   1472
   1473/* Total number of image pages */
   1474static unsigned int nr_copy_pages;
   1475/* Number of pages needed for saving the original pfns of the image pages */
   1476static unsigned int nr_meta_pages;
   1477/*
   1478 * Numbers of normal and highmem page frames allocated for hibernation image
   1479 * before suspending devices.
   1480 */
   1481static unsigned int alloc_normal, alloc_highmem;
   1482/*
   1483 * Memory bitmap used for marking saveable pages (during hibernation) or
   1484 * hibernation image pages (during restore)
   1485 */
   1486static struct memory_bitmap orig_bm;
   1487/*
   1488 * Memory bitmap used during hibernation for marking allocated page frames that
   1489 * will contain copies of saveable pages.  During restore it is initially used
   1490 * for marking hibernation image pages, but then the set bits from it are
   1491 * duplicated in @orig_bm and it is released.  On highmem systems it is next
   1492 * used for marking "safe" highmem pages, but it has to be reinitialized for
   1493 * this purpose.
   1494 */
   1495static struct memory_bitmap copy_bm;
   1496
   1497/**
   1498 * swsusp_free - Free pages allocated for hibernation image.
   1499 *
   1500 * Image pages are allocated before snapshot creation, so they need to be
   1501 * released after resume.
   1502 */
   1503void swsusp_free(void)
   1504{
   1505	unsigned long fb_pfn, fr_pfn;
   1506
   1507	if (!forbidden_pages_map || !free_pages_map)
   1508		goto out;
   1509
   1510	memory_bm_position_reset(forbidden_pages_map);
   1511	memory_bm_position_reset(free_pages_map);
   1512
   1513loop:
   1514	fr_pfn = memory_bm_next_pfn(free_pages_map);
   1515	fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
   1516
   1517	/*
   1518	 * Find the next bit set in both bitmaps. This is guaranteed to
   1519	 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP.
   1520	 */
   1521	do {
   1522		if (fb_pfn < fr_pfn)
   1523			fb_pfn = memory_bm_next_pfn(forbidden_pages_map);
   1524		if (fr_pfn < fb_pfn)
   1525			fr_pfn = memory_bm_next_pfn(free_pages_map);
   1526	} while (fb_pfn != fr_pfn);
   1527
   1528	if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
   1529		struct page *page = pfn_to_page(fr_pfn);
   1530
   1531		memory_bm_clear_current(forbidden_pages_map);
   1532		memory_bm_clear_current(free_pages_map);
   1533		hibernate_restore_unprotect_page(page_address(page));
   1534		__free_page(page);
   1535		goto loop;
   1536	}
   1537
   1538out:
   1539	nr_copy_pages = 0;
   1540	nr_meta_pages = 0;
   1541	restore_pblist = NULL;
   1542	buffer = NULL;
   1543	alloc_normal = 0;
   1544	alloc_highmem = 0;
   1545	hibernate_restore_protection_end();
   1546}
   1547
   1548/* Helper functions used for the shrinking of memory. */
   1549
   1550#define GFP_IMAGE	(GFP_KERNEL | __GFP_NOWARN)
   1551
   1552/**
   1553 * preallocate_image_pages - Allocate a number of pages for hibernation image.
   1554 * @nr_pages: Number of page frames to allocate.
   1555 * @mask: GFP flags to use for the allocation.
   1556 *
   1557 * Return value: Number of page frames actually allocated
   1558 */
   1559static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
   1560{
   1561	unsigned long nr_alloc = 0;
   1562
   1563	while (nr_pages > 0) {
   1564		struct page *page;
   1565
   1566		page = alloc_image_page(mask);
   1567		if (!page)
   1568			break;
   1569		memory_bm_set_bit(&copy_bm, page_to_pfn(page));
   1570		if (PageHighMem(page))
   1571			alloc_highmem++;
   1572		else
   1573			alloc_normal++;
   1574		nr_pages--;
   1575		nr_alloc++;
   1576	}
   1577
   1578	return nr_alloc;
   1579}
   1580
   1581static unsigned long preallocate_image_memory(unsigned long nr_pages,
   1582					      unsigned long avail_normal)
   1583{
   1584	unsigned long alloc;
   1585
   1586	if (avail_normal <= alloc_normal)
   1587		return 0;
   1588
   1589	alloc = avail_normal - alloc_normal;
   1590	if (nr_pages < alloc)
   1591		alloc = nr_pages;
   1592
   1593	return preallocate_image_pages(alloc, GFP_IMAGE);
   1594}
   1595
   1596#ifdef CONFIG_HIGHMEM
   1597static unsigned long preallocate_image_highmem(unsigned long nr_pages)
   1598{
   1599	return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
   1600}
   1601
   1602/**
   1603 *  __fraction - Compute (an approximation of) x * (multiplier / base).
   1604 */
   1605static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
   1606{
   1607	return div64_u64(x * multiplier, base);
   1608}
   1609
   1610static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
   1611						  unsigned long highmem,
   1612						  unsigned long total)
   1613{
   1614	unsigned long alloc = __fraction(nr_pages, highmem, total);
   1615
   1616	return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
   1617}
   1618#else /* CONFIG_HIGHMEM */
   1619static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
   1620{
   1621	return 0;
   1622}
   1623
   1624static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
   1625							 unsigned long highmem,
   1626							 unsigned long total)
   1627{
   1628	return 0;
   1629}
   1630#endif /* CONFIG_HIGHMEM */
   1631
   1632/**
   1633 * free_unnecessary_pages - Release preallocated pages not needed for the image.
   1634 */
   1635static unsigned long free_unnecessary_pages(void)
   1636{
   1637	unsigned long save, to_free_normal, to_free_highmem, free;
   1638
   1639	save = count_data_pages();
   1640	if (alloc_normal >= save) {
   1641		to_free_normal = alloc_normal - save;
   1642		save = 0;
   1643	} else {
   1644		to_free_normal = 0;
   1645		save -= alloc_normal;
   1646	}
   1647	save += count_highmem_pages();
   1648	if (alloc_highmem >= save) {
   1649		to_free_highmem = alloc_highmem - save;
   1650	} else {
   1651		to_free_highmem = 0;
   1652		save -= alloc_highmem;
   1653		if (to_free_normal > save)
   1654			to_free_normal -= save;
   1655		else
   1656			to_free_normal = 0;
   1657	}
   1658	free = to_free_normal + to_free_highmem;
   1659
   1660	memory_bm_position_reset(&copy_bm);
   1661
   1662	while (to_free_normal > 0 || to_free_highmem > 0) {
   1663		unsigned long pfn = memory_bm_next_pfn(&copy_bm);
   1664		struct page *page = pfn_to_page(pfn);
   1665
   1666		if (PageHighMem(page)) {
   1667			if (!to_free_highmem)
   1668				continue;
   1669			to_free_highmem--;
   1670			alloc_highmem--;
   1671		} else {
   1672			if (!to_free_normal)
   1673				continue;
   1674			to_free_normal--;
   1675			alloc_normal--;
   1676		}
   1677		memory_bm_clear_bit(&copy_bm, pfn);
   1678		swsusp_unset_page_forbidden(page);
   1679		swsusp_unset_page_free(page);
   1680		__free_page(page);
   1681	}
   1682
   1683	return free;
   1684}
   1685
   1686/**
   1687 * minimum_image_size - Estimate the minimum acceptable size of an image.
   1688 * @saveable: Number of saveable pages in the system.
   1689 *
   1690 * We want to avoid attempting to free too much memory too hard, so estimate the
   1691 * minimum acceptable size of a hibernation image to use as the lower limit for
   1692 * preallocating memory.
   1693 *
   1694 * We assume that the minimum image size should be proportional to
   1695 *
   1696 * [number of saveable pages] - [number of pages that can be freed in theory]
   1697 *
   1698 * where the second term is the sum of (1) reclaimable slab pages, (2) active
   1699 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages.
   1700 */
   1701static unsigned long minimum_image_size(unsigned long saveable)
   1702{
   1703	unsigned long size;
   1704
   1705	size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B)
   1706		+ global_node_page_state(NR_ACTIVE_ANON)
   1707		+ global_node_page_state(NR_INACTIVE_ANON)
   1708		+ global_node_page_state(NR_ACTIVE_FILE)
   1709		+ global_node_page_state(NR_INACTIVE_FILE);
   1710
   1711	return saveable <= size ? 0 : saveable - size;
   1712}
   1713
   1714/**
   1715 * hibernate_preallocate_memory - Preallocate memory for hibernation image.
   1716 *
   1717 * To create a hibernation image it is necessary to make a copy of every page
   1718 * frame in use.  We also need a number of page frames to be free during
   1719 * hibernation for allocations made while saving the image and for device
   1720 * drivers, in case they need to allocate memory from their hibernation
   1721 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
   1722 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through
   1723 * /sys/power/reserved_size, respectively).  To make this happen, we compute the
   1724 * total number of available page frames and allocate at least
   1725 *
   1726 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
   1727 *  + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
   1728 *
   1729 * of them, which corresponds to the maximum size of a hibernation image.
   1730 *
   1731 * If image_size is set below the number following from the above formula,
   1732 * the preallocation of memory is continued until the total number of saveable
   1733 * pages in the system is below the requested image size or the minimum
   1734 * acceptable image size returned by minimum_image_size(), whichever is greater.
   1735 */
   1736int hibernate_preallocate_memory(void)
   1737{
   1738	struct zone *zone;
   1739	unsigned long saveable, size, max_size, count, highmem, pages = 0;
   1740	unsigned long alloc, save_highmem, pages_highmem, avail_normal;
   1741	ktime_t start, stop;
   1742	int error;
   1743
   1744	pr_info("Preallocating image memory\n");
   1745	start = ktime_get();
   1746
   1747	error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
   1748	if (error) {
   1749		pr_err("Cannot allocate original bitmap\n");
   1750		goto err_out;
   1751	}
   1752
   1753	error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
   1754	if (error) {
   1755		pr_err("Cannot allocate copy bitmap\n");
   1756		goto err_out;
   1757	}
   1758
   1759	alloc_normal = 0;
   1760	alloc_highmem = 0;
   1761
   1762	/* Count the number of saveable data pages. */
   1763	save_highmem = count_highmem_pages();
   1764	saveable = count_data_pages();
   1765
   1766	/*
   1767	 * Compute the total number of page frames we can use (count) and the
   1768	 * number of pages needed for image metadata (size).
   1769	 */
   1770	count = saveable;
   1771	saveable += save_highmem;
   1772	highmem = save_highmem;
   1773	size = 0;
   1774	for_each_populated_zone(zone) {
   1775		size += snapshot_additional_pages(zone);
   1776		if (is_highmem(zone))
   1777			highmem += zone_page_state(zone, NR_FREE_PAGES);
   1778		else
   1779			count += zone_page_state(zone, NR_FREE_PAGES);
   1780	}
   1781	avail_normal = count;
   1782	count += highmem;
   1783	count -= totalreserve_pages;
   1784
   1785	/* Compute the maximum number of saveable pages to leave in memory. */
   1786	max_size = (count - (size + PAGES_FOR_IO)) / 2
   1787			- 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
   1788	/* Compute the desired number of image pages specified by image_size. */
   1789	size = DIV_ROUND_UP(image_size, PAGE_SIZE);
   1790	if (size > max_size)
   1791		size = max_size;
   1792	/*
   1793	 * If the desired number of image pages is at least as large as the
   1794	 * current number of saveable pages in memory, allocate page frames for
   1795	 * the image and we're done.
   1796	 */
   1797	if (size >= saveable) {
   1798		pages = preallocate_image_highmem(save_highmem);
   1799		pages += preallocate_image_memory(saveable - pages, avail_normal);
   1800		goto out;
   1801	}
   1802
   1803	/* Estimate the minimum size of the image. */
   1804	pages = minimum_image_size(saveable);
   1805	/*
   1806	 * To avoid excessive pressure on the normal zone, leave room in it to
   1807	 * accommodate an image of the minimum size (unless it's already too
   1808	 * small, in which case don't preallocate pages from it at all).
   1809	 */
   1810	if (avail_normal > pages)
   1811		avail_normal -= pages;
   1812	else
   1813		avail_normal = 0;
   1814	if (size < pages)
   1815		size = min_t(unsigned long, pages, max_size);
   1816
   1817	/*
   1818	 * Let the memory management subsystem know that we're going to need a
   1819	 * large number of page frames to allocate and make it free some memory.
   1820	 * NOTE: If this is not done, performance will be hurt badly in some
   1821	 * test cases.
   1822	 */
   1823	shrink_all_memory(saveable - size);
   1824
   1825	/*
   1826	 * The number of saveable pages in memory was too high, so apply some
   1827	 * pressure to decrease it.  First, make room for the largest possible
   1828	 * image and fail if that doesn't work.  Next, try to decrease the size
   1829	 * of the image as much as indicated by 'size' using allocations from
   1830	 * highmem and non-highmem zones separately.
   1831	 */
   1832	pages_highmem = preallocate_image_highmem(highmem / 2);
   1833	alloc = count - max_size;
   1834	if (alloc > pages_highmem)
   1835		alloc -= pages_highmem;
   1836	else
   1837		alloc = 0;
   1838	pages = preallocate_image_memory(alloc, avail_normal);
   1839	if (pages < alloc) {
   1840		/* We have exhausted non-highmem pages, try highmem. */
   1841		alloc -= pages;
   1842		pages += pages_highmem;
   1843		pages_highmem = preallocate_image_highmem(alloc);
   1844		if (pages_highmem < alloc) {
   1845			pr_err("Image allocation is %lu pages short\n",
   1846				alloc - pages_highmem);
   1847			goto err_out;
   1848		}
   1849		pages += pages_highmem;
   1850		/*
   1851		 * size is the desired number of saveable pages to leave in
   1852		 * memory, so try to preallocate (all memory - size) pages.
   1853		 */
   1854		alloc = (count - pages) - size;
   1855		pages += preallocate_image_highmem(alloc);
   1856	} else {
   1857		/*
   1858		 * There are approximately max_size saveable pages at this point
   1859		 * and we want to reduce this number down to size.
   1860		 */
   1861		alloc = max_size - size;
   1862		size = preallocate_highmem_fraction(alloc, highmem, count);
   1863		pages_highmem += size;
   1864		alloc -= size;
   1865		size = preallocate_image_memory(alloc, avail_normal);
   1866		pages_highmem += preallocate_image_highmem(alloc - size);
   1867		pages += pages_highmem + size;
   1868	}
   1869
   1870	/*
   1871	 * We only need as many page frames for the image as there are saveable
   1872	 * pages in memory, but we have allocated more.  Release the excessive
   1873	 * ones now.
   1874	 */
   1875	pages -= free_unnecessary_pages();
   1876
   1877 out:
   1878	stop = ktime_get();
   1879	pr_info("Allocated %lu pages for snapshot\n", pages);
   1880	swsusp_show_speed(start, stop, pages, "Allocated");
   1881
   1882	return 0;
   1883
   1884 err_out:
   1885	swsusp_free();
   1886	return -ENOMEM;
   1887}
   1888
   1889#ifdef CONFIG_HIGHMEM
   1890/**
   1891 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
   1892 *
   1893 * Compute the number of non-highmem pages that will be necessary for creating
   1894 * copies of highmem pages.
   1895 */
   1896static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
   1897{
   1898	unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
   1899
   1900	if (free_highmem >= nr_highmem)
   1901		nr_highmem = 0;
   1902	else
   1903		nr_highmem -= free_highmem;
   1904
   1905	return nr_highmem;
   1906}
   1907#else
   1908static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
   1909#endif /* CONFIG_HIGHMEM */
   1910
   1911/**
   1912 * enough_free_mem - Check if there is enough free memory for the image.
   1913 */
   1914static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
   1915{
   1916	struct zone *zone;
   1917	unsigned int free = alloc_normal;
   1918
   1919	for_each_populated_zone(zone)
   1920		if (!is_highmem(zone))
   1921			free += zone_page_state(zone, NR_FREE_PAGES);
   1922
   1923	nr_pages += count_pages_for_highmem(nr_highmem);
   1924	pr_debug("Normal pages needed: %u + %u, available pages: %u\n",
   1925		 nr_pages, PAGES_FOR_IO, free);
   1926
   1927	return free > nr_pages + PAGES_FOR_IO;
   1928}
   1929
   1930#ifdef CONFIG_HIGHMEM
   1931/**
   1932 * get_highmem_buffer - Allocate a buffer for highmem pages.
   1933 *
   1934 * If there are some highmem pages in the hibernation image, we may need a
   1935 * buffer to copy them and/or load their data.
   1936 */
   1937static inline int get_highmem_buffer(int safe_needed)
   1938{
   1939	buffer = get_image_page(GFP_ATOMIC, safe_needed);
   1940	return buffer ? 0 : -ENOMEM;
   1941}
   1942
   1943/**
   1944 * alloc_highmem_pages - Allocate some highmem pages for the image.
   1945 *
   1946 * Try to allocate as many pages as needed, but if the number of free highmem
   1947 * pages is less than that, allocate them all.
   1948 */
   1949static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
   1950					       unsigned int nr_highmem)
   1951{
   1952	unsigned int to_alloc = count_free_highmem_pages();
   1953
   1954	if (to_alloc > nr_highmem)
   1955		to_alloc = nr_highmem;
   1956
   1957	nr_highmem -= to_alloc;
   1958	while (to_alloc-- > 0) {
   1959		struct page *page;
   1960
   1961		page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM);
   1962		memory_bm_set_bit(bm, page_to_pfn(page));
   1963	}
   1964	return nr_highmem;
   1965}
   1966#else
   1967static inline int get_highmem_buffer(int safe_needed) { return 0; }
   1968
   1969static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
   1970					       unsigned int n) { return 0; }
   1971#endif /* CONFIG_HIGHMEM */
   1972
   1973/**
   1974 * swsusp_alloc - Allocate memory for hibernation image.
   1975 *
   1976 * We first try to allocate as many highmem pages as there are
   1977 * saveable highmem pages in the system.  If that fails, we allocate
   1978 * non-highmem pages for the copies of the remaining highmem ones.
   1979 *
   1980 * In this approach it is likely that the copies of highmem pages will
   1981 * also be located in the high memory, because of the way in which
   1982 * copy_data_pages() works.
   1983 */
   1984static int swsusp_alloc(struct memory_bitmap *copy_bm,
   1985			unsigned int nr_pages, unsigned int nr_highmem)
   1986{
   1987	if (nr_highmem > 0) {
   1988		if (get_highmem_buffer(PG_ANY))
   1989			goto err_out;
   1990		if (nr_highmem > alloc_highmem) {
   1991			nr_highmem -= alloc_highmem;
   1992			nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
   1993		}
   1994	}
   1995	if (nr_pages > alloc_normal) {
   1996		nr_pages -= alloc_normal;
   1997		while (nr_pages-- > 0) {
   1998			struct page *page;
   1999
   2000			page = alloc_image_page(GFP_ATOMIC);
   2001			if (!page)
   2002				goto err_out;
   2003			memory_bm_set_bit(copy_bm, page_to_pfn(page));
   2004		}
   2005	}
   2006
   2007	return 0;
   2008
   2009 err_out:
   2010	swsusp_free();
   2011	return -ENOMEM;
   2012}
   2013
   2014asmlinkage __visible int swsusp_save(void)
   2015{
   2016	unsigned int nr_pages, nr_highmem;
   2017
   2018	pr_info("Creating image:\n");
   2019
   2020	drain_local_pages(NULL);
   2021	nr_pages = count_data_pages();
   2022	nr_highmem = count_highmem_pages();
   2023	pr_info("Need to copy %u pages\n", nr_pages + nr_highmem);
   2024
   2025	if (!enough_free_mem(nr_pages, nr_highmem)) {
   2026		pr_err("Not enough free memory\n");
   2027		return -ENOMEM;
   2028	}
   2029
   2030	if (swsusp_alloc(&copy_bm, nr_pages, nr_highmem)) {
   2031		pr_err("Memory allocation failed\n");
   2032		return -ENOMEM;
   2033	}
   2034
   2035	/*
   2036	 * During allocating of suspend pagedir, new cold pages may appear.
   2037	 * Kill them.
   2038	 */
   2039	drain_local_pages(NULL);
   2040	copy_data_pages(&copy_bm, &orig_bm);
   2041
   2042	/*
   2043	 * End of critical section. From now on, we can write to memory,
   2044	 * but we should not touch disk. This specially means we must _not_
   2045	 * touch swap space! Except we must write out our image of course.
   2046	 */
   2047
   2048	nr_pages += nr_highmem;
   2049	nr_copy_pages = nr_pages;
   2050	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
   2051
   2052	pr_info("Image created (%d pages copied)\n", nr_pages);
   2053
   2054	return 0;
   2055}
   2056
   2057#ifndef CONFIG_ARCH_HIBERNATION_HEADER
   2058static int init_header_complete(struct swsusp_info *info)
   2059{
   2060	memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
   2061	info->version_code = LINUX_VERSION_CODE;
   2062	return 0;
   2063}
   2064
   2065static const char *check_image_kernel(struct swsusp_info *info)
   2066{
   2067	if (info->version_code != LINUX_VERSION_CODE)
   2068		return "kernel version";
   2069	if (strcmp(info->uts.sysname,init_utsname()->sysname))
   2070		return "system type";
   2071	if (strcmp(info->uts.release,init_utsname()->release))
   2072		return "kernel release";
   2073	if (strcmp(info->uts.version,init_utsname()->version))
   2074		return "version";
   2075	if (strcmp(info->uts.machine,init_utsname()->machine))
   2076		return "machine";
   2077	return NULL;
   2078}
   2079#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
   2080
   2081unsigned long snapshot_get_image_size(void)
   2082{
   2083	return nr_copy_pages + nr_meta_pages + 1;
   2084}
   2085
   2086static int init_header(struct swsusp_info *info)
   2087{
   2088	memset(info, 0, sizeof(struct swsusp_info));
   2089	info->num_physpages = get_num_physpages();
   2090	info->image_pages = nr_copy_pages;
   2091	info->pages = snapshot_get_image_size();
   2092	info->size = info->pages;
   2093	info->size <<= PAGE_SHIFT;
   2094	return init_header_complete(info);
   2095}
   2096
   2097/**
   2098 * pack_pfns - Prepare PFNs for saving.
   2099 * @bm: Memory bitmap.
   2100 * @buf: Memory buffer to store the PFNs in.
   2101 *
   2102 * PFNs corresponding to set bits in @bm are stored in the area of memory
   2103 * pointed to by @buf (1 page at a time).
   2104 */
   2105static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
   2106{
   2107	int j;
   2108
   2109	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
   2110		buf[j] = memory_bm_next_pfn(bm);
   2111		if (unlikely(buf[j] == BM_END_OF_MAP))
   2112			break;
   2113	}
   2114}
   2115
   2116/**
   2117 * snapshot_read_next - Get the address to read the next image page from.
   2118 * @handle: Snapshot handle to be used for the reading.
   2119 *
   2120 * On the first call, @handle should point to a zeroed snapshot_handle
   2121 * structure.  The structure gets populated then and a pointer to it should be
   2122 * passed to this function every next time.
   2123 *
   2124 * On success, the function returns a positive number.  Then, the caller
   2125 * is allowed to read up to the returned number of bytes from the memory
   2126 * location computed by the data_of() macro.
   2127 *
   2128 * The function returns 0 to indicate the end of the data stream condition,
   2129 * and negative numbers are returned on errors.  If that happens, the structure
   2130 * pointed to by @handle is not updated and should not be used any more.
   2131 */
   2132int snapshot_read_next(struct snapshot_handle *handle)
   2133{
   2134	if (handle->cur > nr_meta_pages + nr_copy_pages)
   2135		return 0;
   2136
   2137	if (!buffer) {
   2138		/* This makes the buffer be freed by swsusp_free() */
   2139		buffer = get_image_page(GFP_ATOMIC, PG_ANY);
   2140		if (!buffer)
   2141			return -ENOMEM;
   2142	}
   2143	if (!handle->cur) {
   2144		int error;
   2145
   2146		error = init_header((struct swsusp_info *)buffer);
   2147		if (error)
   2148			return error;
   2149		handle->buffer = buffer;
   2150		memory_bm_position_reset(&orig_bm);
   2151		memory_bm_position_reset(&copy_bm);
   2152	} else if (handle->cur <= nr_meta_pages) {
   2153		clear_page(buffer);
   2154		pack_pfns(buffer, &orig_bm);
   2155	} else {
   2156		struct page *page;
   2157
   2158		page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
   2159		if (PageHighMem(page)) {
   2160			/*
   2161			 * Highmem pages are copied to the buffer,
   2162			 * because we can't return with a kmapped
   2163			 * highmem page (we may not be called again).
   2164			 */
   2165			void *kaddr;
   2166
   2167			kaddr = kmap_atomic(page);
   2168			copy_page(buffer, kaddr);
   2169			kunmap_atomic(kaddr);
   2170			handle->buffer = buffer;
   2171		} else {
   2172			handle->buffer = page_address(page);
   2173		}
   2174	}
   2175	handle->cur++;
   2176	return PAGE_SIZE;
   2177}
   2178
   2179static void duplicate_memory_bitmap(struct memory_bitmap *dst,
   2180				    struct memory_bitmap *src)
   2181{
   2182	unsigned long pfn;
   2183
   2184	memory_bm_position_reset(src);
   2185	pfn = memory_bm_next_pfn(src);
   2186	while (pfn != BM_END_OF_MAP) {
   2187		memory_bm_set_bit(dst, pfn);
   2188		pfn = memory_bm_next_pfn(src);
   2189	}
   2190}
   2191
   2192/**
   2193 * mark_unsafe_pages - Mark pages that were used before hibernation.
   2194 *
   2195 * Mark the pages that cannot be used for storing the image during restoration,
   2196 * because they conflict with the pages that had been used before hibernation.
   2197 */
   2198static void mark_unsafe_pages(struct memory_bitmap *bm)
   2199{
   2200	unsigned long pfn;
   2201
   2202	/* Clear the "free"/"unsafe" bit for all PFNs */
   2203	memory_bm_position_reset(free_pages_map);
   2204	pfn = memory_bm_next_pfn(free_pages_map);
   2205	while (pfn != BM_END_OF_MAP) {
   2206		memory_bm_clear_current(free_pages_map);
   2207		pfn = memory_bm_next_pfn(free_pages_map);
   2208	}
   2209
   2210	/* Mark pages that correspond to the "original" PFNs as "unsafe" */
   2211	duplicate_memory_bitmap(free_pages_map, bm);
   2212
   2213	allocated_unsafe_pages = 0;
   2214}
   2215
   2216static int check_header(struct swsusp_info *info)
   2217{
   2218	const char *reason;
   2219
   2220	reason = check_image_kernel(info);
   2221	if (!reason && info->num_physpages != get_num_physpages())
   2222		reason = "memory size";
   2223	if (reason) {
   2224		pr_err("Image mismatch: %s\n", reason);
   2225		return -EPERM;
   2226	}
   2227	return 0;
   2228}
   2229
   2230/**
   2231 * load_header - Check the image header and copy the data from it.
   2232 */
   2233static int load_header(struct swsusp_info *info)
   2234{
   2235	int error;
   2236
   2237	restore_pblist = NULL;
   2238	error = check_header(info);
   2239	if (!error) {
   2240		nr_copy_pages = info->image_pages;
   2241		nr_meta_pages = info->pages - info->image_pages - 1;
   2242	}
   2243	return error;
   2244}
   2245
   2246/**
   2247 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
   2248 * @bm: Memory bitmap.
   2249 * @buf: Area of memory containing the PFNs.
   2250 *
   2251 * For each element of the array pointed to by @buf (1 page at a time), set the
   2252 * corresponding bit in @bm.
   2253 */
   2254static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
   2255{
   2256	int j;
   2257
   2258	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
   2259		if (unlikely(buf[j] == BM_END_OF_MAP))
   2260			break;
   2261
   2262		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j]))
   2263			memory_bm_set_bit(bm, buf[j]);
   2264		else
   2265			return -EFAULT;
   2266	}
   2267
   2268	return 0;
   2269}
   2270
   2271#ifdef CONFIG_HIGHMEM
   2272/*
   2273 * struct highmem_pbe is used for creating the list of highmem pages that
   2274 * should be restored atomically during the resume from disk, because the page
   2275 * frames they have occupied before the suspend are in use.
   2276 */
   2277struct highmem_pbe {
   2278	struct page *copy_page;	/* data is here now */
   2279	struct page *orig_page;	/* data was here before the suspend */
   2280	struct highmem_pbe *next;
   2281};
   2282
   2283/*
   2284 * List of highmem PBEs needed for restoring the highmem pages that were
   2285 * allocated before the suspend and included in the suspend image, but have
   2286 * also been allocated by the "resume" kernel, so their contents cannot be
   2287 * written directly to their "original" page frames.
   2288 */
   2289static struct highmem_pbe *highmem_pblist;
   2290
   2291/**
   2292 * count_highmem_image_pages - Compute the number of highmem pages in the image.
   2293 * @bm: Memory bitmap.
   2294 *
   2295 * The bits in @bm that correspond to image pages are assumed to be set.
   2296 */
   2297static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
   2298{
   2299	unsigned long pfn;
   2300	unsigned int cnt = 0;
   2301
   2302	memory_bm_position_reset(bm);
   2303	pfn = memory_bm_next_pfn(bm);
   2304	while (pfn != BM_END_OF_MAP) {
   2305		if (PageHighMem(pfn_to_page(pfn)))
   2306			cnt++;
   2307
   2308		pfn = memory_bm_next_pfn(bm);
   2309	}
   2310	return cnt;
   2311}
   2312
   2313static unsigned int safe_highmem_pages;
   2314
   2315static struct memory_bitmap *safe_highmem_bm;
   2316
   2317/**
   2318 * prepare_highmem_image - Allocate memory for loading highmem data from image.
   2319 * @bm: Pointer to an uninitialized memory bitmap structure.
   2320 * @nr_highmem_p: Pointer to the number of highmem image pages.
   2321 *
   2322 * Try to allocate as many highmem pages as there are highmem image pages
   2323 * (@nr_highmem_p points to the variable containing the number of highmem image
   2324 * pages).  The pages that are "safe" (ie. will not be overwritten when the
   2325 * hibernation image is restored entirely) have the corresponding bits set in
   2326 * @bm (it must be uninitialized).
   2327 *
   2328 * NOTE: This function should not be called if there are no highmem image pages.
   2329 */
   2330static int prepare_highmem_image(struct memory_bitmap *bm,
   2331				 unsigned int *nr_highmem_p)
   2332{
   2333	unsigned int to_alloc;
   2334
   2335	if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
   2336		return -ENOMEM;
   2337
   2338	if (get_highmem_buffer(PG_SAFE))
   2339		return -ENOMEM;
   2340
   2341	to_alloc = count_free_highmem_pages();
   2342	if (to_alloc > *nr_highmem_p)
   2343		to_alloc = *nr_highmem_p;
   2344	else
   2345		*nr_highmem_p = to_alloc;
   2346
   2347	safe_highmem_pages = 0;
   2348	while (to_alloc-- > 0) {
   2349		struct page *page;
   2350
   2351		page = alloc_page(__GFP_HIGHMEM);
   2352		if (!swsusp_page_is_free(page)) {
   2353			/* The page is "safe", set its bit the bitmap */
   2354			memory_bm_set_bit(bm, page_to_pfn(page));
   2355			safe_highmem_pages++;
   2356		}
   2357		/* Mark the page as allocated */
   2358		swsusp_set_page_forbidden(page);
   2359		swsusp_set_page_free(page);
   2360	}
   2361	memory_bm_position_reset(bm);
   2362	safe_highmem_bm = bm;
   2363	return 0;
   2364}
   2365
   2366static struct page *last_highmem_page;
   2367
   2368/**
   2369 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
   2370 *
   2371 * For a given highmem image page get a buffer that suspend_write_next() should
   2372 * return to its caller to write to.
   2373 *
   2374 * If the page is to be saved to its "original" page frame or a copy of
   2375 * the page is to be made in the highmem, @buffer is returned.  Otherwise,
   2376 * the copy of the page is to be made in normal memory, so the address of
   2377 * the copy is returned.
   2378 *
   2379 * If @buffer is returned, the caller of suspend_write_next() will write
   2380 * the page's contents to @buffer, so they will have to be copied to the
   2381 * right location on the next call to suspend_write_next() and it is done
   2382 * with the help of copy_last_highmem_page().  For this purpose, if
   2383 * @buffer is returned, @last_highmem_page is set to the page to which
   2384 * the data will have to be copied from @buffer.
   2385 */
   2386static void *get_highmem_page_buffer(struct page *page,
   2387				     struct chain_allocator *ca)
   2388{
   2389	struct highmem_pbe *pbe;
   2390	void *kaddr;
   2391
   2392	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
   2393		/*
   2394		 * We have allocated the "original" page frame and we can
   2395		 * use it directly to store the loaded page.
   2396		 */
   2397		last_highmem_page = page;
   2398		return buffer;
   2399	}
   2400	/*
   2401	 * The "original" page frame has not been allocated and we have to
   2402	 * use a "safe" page frame to store the loaded page.
   2403	 */
   2404	pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
   2405	if (!pbe) {
   2406		swsusp_free();
   2407		return ERR_PTR(-ENOMEM);
   2408	}
   2409	pbe->orig_page = page;
   2410	if (safe_highmem_pages > 0) {
   2411		struct page *tmp;
   2412
   2413		/* Copy of the page will be stored in high memory */
   2414		kaddr = buffer;
   2415		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
   2416		safe_highmem_pages--;
   2417		last_highmem_page = tmp;
   2418		pbe->copy_page = tmp;
   2419	} else {
   2420		/* Copy of the page will be stored in normal memory */
   2421		kaddr = safe_pages_list;
   2422		safe_pages_list = safe_pages_list->next;
   2423		pbe->copy_page = virt_to_page(kaddr);
   2424	}
   2425	pbe->next = highmem_pblist;
   2426	highmem_pblist = pbe;
   2427	return kaddr;
   2428}
   2429
   2430/**
   2431 * copy_last_highmem_page - Copy most the most recent highmem image page.
   2432 *
   2433 * Copy the contents of a highmem image from @buffer, where the caller of
   2434 * snapshot_write_next() has stored them, to the right location represented by
   2435 * @last_highmem_page .
   2436 */
   2437static void copy_last_highmem_page(void)
   2438{
   2439	if (last_highmem_page) {
   2440		void *dst;
   2441
   2442		dst = kmap_atomic(last_highmem_page);
   2443		copy_page(dst, buffer);
   2444		kunmap_atomic(dst);
   2445		last_highmem_page = NULL;
   2446	}
   2447}
   2448
   2449static inline int last_highmem_page_copied(void)
   2450{
   2451	return !last_highmem_page;
   2452}
   2453
   2454static inline void free_highmem_data(void)
   2455{
   2456	if (safe_highmem_bm)
   2457		memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
   2458
   2459	if (buffer)
   2460		free_image_page(buffer, PG_UNSAFE_CLEAR);
   2461}
   2462#else
   2463static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
   2464
   2465static inline int prepare_highmem_image(struct memory_bitmap *bm,
   2466					unsigned int *nr_highmem_p) { return 0; }
   2467
   2468static inline void *get_highmem_page_buffer(struct page *page,
   2469					    struct chain_allocator *ca)
   2470{
   2471	return ERR_PTR(-EINVAL);
   2472}
   2473
   2474static inline void copy_last_highmem_page(void) {}
   2475static inline int last_highmem_page_copied(void) { return 1; }
   2476static inline void free_highmem_data(void) {}
   2477#endif /* CONFIG_HIGHMEM */
   2478
   2479#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
   2480
   2481/**
   2482 * prepare_image - Make room for loading hibernation image.
   2483 * @new_bm: Uninitialized memory bitmap structure.
   2484 * @bm: Memory bitmap with unsafe pages marked.
   2485 *
   2486 * Use @bm to mark the pages that will be overwritten in the process of
   2487 * restoring the system memory state from the suspend image ("unsafe" pages)
   2488 * and allocate memory for the image.
   2489 *
   2490 * The idea is to allocate a new memory bitmap first and then allocate
   2491 * as many pages as needed for image data, but without specifying what those
   2492 * pages will be used for just yet.  Instead, we mark them all as allocated and
   2493 * create a lists of "safe" pages to be used later.  On systems with high
   2494 * memory a list of "safe" highmem pages is created too.
   2495 */
   2496static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
   2497{
   2498	unsigned int nr_pages, nr_highmem;
   2499	struct linked_page *lp;
   2500	int error;
   2501
   2502	/* If there is no highmem, the buffer will not be necessary */
   2503	free_image_page(buffer, PG_UNSAFE_CLEAR);
   2504	buffer = NULL;
   2505
   2506	nr_highmem = count_highmem_image_pages(bm);
   2507	mark_unsafe_pages(bm);
   2508
   2509	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
   2510	if (error)
   2511		goto Free;
   2512
   2513	duplicate_memory_bitmap(new_bm, bm);
   2514	memory_bm_free(bm, PG_UNSAFE_KEEP);
   2515	if (nr_highmem > 0) {
   2516		error = prepare_highmem_image(bm, &nr_highmem);
   2517		if (error)
   2518			goto Free;
   2519	}
   2520	/*
   2521	 * Reserve some safe pages for potential later use.
   2522	 *
   2523	 * NOTE: This way we make sure there will be enough safe pages for the
   2524	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
   2525	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
   2526	 *
   2527	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
   2528	 */
   2529	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
   2530	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
   2531	while (nr_pages > 0) {
   2532		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
   2533		if (!lp) {
   2534			error = -ENOMEM;
   2535			goto Free;
   2536		}
   2537		lp->next = safe_pages_list;
   2538		safe_pages_list = lp;
   2539		nr_pages--;
   2540	}
   2541	/* Preallocate memory for the image */
   2542	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
   2543	while (nr_pages > 0) {
   2544		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
   2545		if (!lp) {
   2546			error = -ENOMEM;
   2547			goto Free;
   2548		}
   2549		if (!swsusp_page_is_free(virt_to_page(lp))) {
   2550			/* The page is "safe", add it to the list */
   2551			lp->next = safe_pages_list;
   2552			safe_pages_list = lp;
   2553		}
   2554		/* Mark the page as allocated */
   2555		swsusp_set_page_forbidden(virt_to_page(lp));
   2556		swsusp_set_page_free(virt_to_page(lp));
   2557		nr_pages--;
   2558	}
   2559	return 0;
   2560
   2561 Free:
   2562	swsusp_free();
   2563	return error;
   2564}
   2565
   2566/**
   2567 * get_buffer - Get the address to store the next image data page.
   2568 *
   2569 * Get the address that snapshot_write_next() should return to its caller to
   2570 * write to.
   2571 */
   2572static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
   2573{
   2574	struct pbe *pbe;
   2575	struct page *page;
   2576	unsigned long pfn = memory_bm_next_pfn(bm);
   2577
   2578	if (pfn == BM_END_OF_MAP)
   2579		return ERR_PTR(-EFAULT);
   2580
   2581	page = pfn_to_page(pfn);
   2582	if (PageHighMem(page))
   2583		return get_highmem_page_buffer(page, ca);
   2584
   2585	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
   2586		/*
   2587		 * We have allocated the "original" page frame and we can
   2588		 * use it directly to store the loaded page.
   2589		 */
   2590		return page_address(page);
   2591
   2592	/*
   2593	 * The "original" page frame has not been allocated and we have to
   2594	 * use a "safe" page frame to store the loaded page.
   2595	 */
   2596	pbe = chain_alloc(ca, sizeof(struct pbe));
   2597	if (!pbe) {
   2598		swsusp_free();
   2599		return ERR_PTR(-ENOMEM);
   2600	}
   2601	pbe->orig_address = page_address(page);
   2602	pbe->address = safe_pages_list;
   2603	safe_pages_list = safe_pages_list->next;
   2604	pbe->next = restore_pblist;
   2605	restore_pblist = pbe;
   2606	return pbe->address;
   2607}
   2608
   2609/**
   2610 * snapshot_write_next - Get the address to store the next image page.
   2611 * @handle: Snapshot handle structure to guide the writing.
   2612 *
   2613 * On the first call, @handle should point to a zeroed snapshot_handle
   2614 * structure.  The structure gets populated then and a pointer to it should be
   2615 * passed to this function every next time.
   2616 *
   2617 * On success, the function returns a positive number.  Then, the caller
   2618 * is allowed to write up to the returned number of bytes to the memory
   2619 * location computed by the data_of() macro.
   2620 *
   2621 * The function returns 0 to indicate the "end of file" condition.  Negative
   2622 * numbers are returned on errors, in which cases the structure pointed to by
   2623 * @handle is not updated and should not be used any more.
   2624 */
   2625int snapshot_write_next(struct snapshot_handle *handle)
   2626{
   2627	static struct chain_allocator ca;
   2628	int error = 0;
   2629
   2630	/* Check if we have already loaded the entire image */
   2631	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
   2632		return 0;
   2633
   2634	handle->sync_read = 1;
   2635
   2636	if (!handle->cur) {
   2637		if (!buffer)
   2638			/* This makes the buffer be freed by swsusp_free() */
   2639			buffer = get_image_page(GFP_ATOMIC, PG_ANY);
   2640
   2641		if (!buffer)
   2642			return -ENOMEM;
   2643
   2644		handle->buffer = buffer;
   2645	} else if (handle->cur == 1) {
   2646		error = load_header(buffer);
   2647		if (error)
   2648			return error;
   2649
   2650		safe_pages_list = NULL;
   2651
   2652		error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
   2653		if (error)
   2654			return error;
   2655
   2656		hibernate_restore_protection_begin();
   2657	} else if (handle->cur <= nr_meta_pages + 1) {
   2658		error = unpack_orig_pfns(buffer, &copy_bm);
   2659		if (error)
   2660			return error;
   2661
   2662		if (handle->cur == nr_meta_pages + 1) {
   2663			error = prepare_image(&orig_bm, &copy_bm);
   2664			if (error)
   2665				return error;
   2666
   2667			chain_init(&ca, GFP_ATOMIC, PG_SAFE);
   2668			memory_bm_position_reset(&orig_bm);
   2669			restore_pblist = NULL;
   2670			handle->buffer = get_buffer(&orig_bm, &ca);
   2671			handle->sync_read = 0;
   2672			if (IS_ERR(handle->buffer))
   2673				return PTR_ERR(handle->buffer);
   2674		}
   2675	} else {
   2676		copy_last_highmem_page();
   2677		hibernate_restore_protect_page(handle->buffer);
   2678		handle->buffer = get_buffer(&orig_bm, &ca);
   2679		if (IS_ERR(handle->buffer))
   2680			return PTR_ERR(handle->buffer);
   2681		if (handle->buffer != buffer)
   2682			handle->sync_read = 0;
   2683	}
   2684	handle->cur++;
   2685	return PAGE_SIZE;
   2686}
   2687
   2688/**
   2689 * snapshot_write_finalize - Complete the loading of a hibernation image.
   2690 *
   2691 * Must be called after the last call to snapshot_write_next() in case the last
   2692 * page in the image happens to be a highmem page and its contents should be
   2693 * stored in highmem.  Additionally, it recycles bitmap memory that's not
   2694 * necessary any more.
   2695 */
   2696void snapshot_write_finalize(struct snapshot_handle *handle)
   2697{
   2698	copy_last_highmem_page();
   2699	hibernate_restore_protect_page(handle->buffer);
   2700	/* Do that only if we have loaded the image entirely */
   2701	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
   2702		memory_bm_recycle(&orig_bm);
   2703		free_highmem_data();
   2704	}
   2705}
   2706
   2707int snapshot_image_loaded(struct snapshot_handle *handle)
   2708{
   2709	return !(!nr_copy_pages || !last_highmem_page_copied() ||
   2710			handle->cur <= nr_meta_pages + nr_copy_pages);
   2711}
   2712
   2713#ifdef CONFIG_HIGHMEM
   2714/* Assumes that @buf is ready and points to a "safe" page */
   2715static inline void swap_two_pages_data(struct page *p1, struct page *p2,
   2716				       void *buf)
   2717{
   2718	void *kaddr1, *kaddr2;
   2719
   2720	kaddr1 = kmap_atomic(p1);
   2721	kaddr2 = kmap_atomic(p2);
   2722	copy_page(buf, kaddr1);
   2723	copy_page(kaddr1, kaddr2);
   2724	copy_page(kaddr2, buf);
   2725	kunmap_atomic(kaddr2);
   2726	kunmap_atomic(kaddr1);
   2727}
   2728
   2729/**
   2730 * restore_highmem - Put highmem image pages into their original locations.
   2731 *
   2732 * For each highmem page that was in use before hibernation and is included in
   2733 * the image, and also has been allocated by the "restore" kernel, swap its
   2734 * current contents with the previous (ie. "before hibernation") ones.
   2735 *
   2736 * If the restore eventually fails, we can call this function once again and
   2737 * restore the highmem state as seen by the restore kernel.
   2738 */
   2739int restore_highmem(void)
   2740{
   2741	struct highmem_pbe *pbe = highmem_pblist;
   2742	void *buf;
   2743
   2744	if (!pbe)
   2745		return 0;
   2746
   2747	buf = get_image_page(GFP_ATOMIC, PG_SAFE);
   2748	if (!buf)
   2749		return -ENOMEM;
   2750
   2751	while (pbe) {
   2752		swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
   2753		pbe = pbe->next;
   2754	}
   2755	free_image_page(buf, PG_UNSAFE_CLEAR);
   2756	return 0;
   2757}
   2758#endif /* CONFIG_HIGHMEM */