summaryrefslogtreecommitdiff
path: root/kernel/power
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/power')
-rw-r--r--kernel/power/Kconfig581
-rw-r--r--kernel/power/Makefile47
-rw-r--r--kernel/power/autosleep.c128
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/console.c151
-rw-r--r--kernel/power/hibernate.c1178
-rw-r--r--kernel/power/main.c646
-rw-r--r--kernel/power/power.h335
-rw-r--r--kernel/power/poweroff.c46
-rw-r--r--kernel/power/process.c237
-rw-r--r--kernel/power/qos.c713
-rw-r--r--kernel/power/snapshot.c2722
-rw-r--r--kernel/power/suspend.c536
-rw-r--r--kernel/power/suspend_test.c218
-rw-r--r--kernel/power/swap.c1512
-rw-r--r--kernel/power/tuxonice.h260
-rw-r--r--kernel/power/tuxonice_alloc.c308
-rw-r--r--kernel/power/tuxonice_alloc.h54
-rw-r--r--kernel/power/tuxonice_atomic_copy.c469
-rw-r--r--kernel/power/tuxonice_atomic_copy.h25
-rw-r--r--kernel/power/tuxonice_bio.h78
-rw-r--r--kernel/power/tuxonice_bio_chains.c1126
-rw-r--r--kernel/power/tuxonice_bio_core.c1933
-rw-r--r--kernel/power/tuxonice_bio_internal.h101
-rw-r--r--kernel/power/tuxonice_bio_signature.c403
-rw-r--r--kernel/power/tuxonice_builtin.c498
-rw-r--r--kernel/power/tuxonice_builtin.h41
-rw-r--r--kernel/power/tuxonice_checksum.c392
-rw-r--r--kernel/power/tuxonice_checksum.h31
-rw-r--r--kernel/power/tuxonice_cluster.c1058
-rw-r--r--kernel/power/tuxonice_cluster.h18
-rw-r--r--kernel/power/tuxonice_compress.c452
-rw-r--r--kernel/power/tuxonice_copy_before_write.c240
-rw-r--r--kernel/power/tuxonice_extent.c144
-rw-r--r--kernel/power/tuxonice_extent.h45
-rw-r--r--kernel/power/tuxonice_file.c484
-rw-r--r--kernel/power/tuxonice_highlevel.c1413
-rw-r--r--kernel/power/tuxonice_incremental.c402
-rw-r--r--kernel/power/tuxonice_io.c1932
-rw-r--r--kernel/power/tuxonice_io.h72
-rw-r--r--kernel/power/tuxonice_modules.c520
-rw-r--r--kernel/power/tuxonice_modules.h212
-rw-r--r--kernel/power/tuxonice_netlink.c324
-rw-r--r--kernel/power/tuxonice_netlink.h62
-rw-r--r--kernel/power/tuxonice_pagedir.c345
-rw-r--r--kernel/power/tuxonice_pagedir.h50
-rw-r--r--kernel/power/tuxonice_pageflags.c18
-rw-r--r--kernel/power/tuxonice_pageflags.h106
-rw-r--r--kernel/power/tuxonice_power_off.c286
-rw-r--r--kernel/power/tuxonice_power_off.h24
-rw-r--r--kernel/power/tuxonice_prepare_image.c1080
-rw-r--r--kernel/power/tuxonice_prepare_image.h38
-rw-r--r--kernel/power/tuxonice_prune.c406
-rw-r--r--kernel/power/tuxonice_storage.c282
-rw-r--r--kernel/power/tuxonice_storage.h45
-rw-r--r--kernel/power/tuxonice_swap.c474
-rw-r--r--kernel/power/tuxonice_sysfs.c333
-rw-r--r--kernel/power/tuxonice_sysfs.h137
-rw-r--r--kernel/power/tuxonice_ui.c247
-rw-r--r--kernel/power/tuxonice_ui.h97
-rw-r--r--kernel/power/tuxonice_userui.c658
-rw-r--r--kernel/power/user.c478
-rw-r--r--kernel/power/wakelock.c268
63 files changed, 27622 insertions, 0 deletions
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
new file mode 100644
index 000000000..89a46f3ff
--- /dev/null
+++ b/kernel/power/Kconfig
@@ -0,0 +1,581 @@
+config SUSPEND
+ bool "Suspend to RAM and standby"
+ depends on ARCH_SUSPEND_POSSIBLE
+ default y
+ ---help---
+ Allow the system to enter sleep states in which main memory is
+ powered and thus its contents are preserved, such as the
+ suspend-to-RAM state (e.g. the ACPI S3 state).
+
+config SUSPEND_FREEZER
+ bool "Enable freezer for suspend to RAM/standby" \
+ if ARCH_WANTS_FREEZER_CONTROL || BROKEN
+ depends on SUSPEND
+ default y
+ help
+ This allows you to turn off the freezer for suspend. If this is
+ done, no tasks are frozen for suspend to RAM/standby.
+
+ Turning OFF this setting is NOT recommended! If in doubt, say Y.
+
+config HIBERNATE_CALLBACKS
+ bool
+
+config HIBERNATION
+ bool "Hibernation (aka 'suspend to disk')"
+ depends on SWAP && ARCH_HIBERNATION_POSSIBLE
+ select HIBERNATE_CALLBACKS
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
+ select CRC32
+ ---help---
+ Enable the suspend to disk (STD) functionality, which is usually
+ called "hibernation" in user interfaces. STD checkpoints the
+ system and powers it off; and restores that checkpoint on reboot.
+
+ You can suspend your machine with 'echo disk > /sys/power/state'
+ after placing resume=/dev/swappartition on the kernel command line
+ in your bootloader's configuration file.
+
+ Alternatively, you can use the additional userland tools available
+ from <http://suspend.sf.net>.
+
+ In principle it does not require ACPI or APM, although for example
+ ACPI will be used for the final steps when it is available. One
+ of the reasons to use software suspend is that the firmware hooks
+ for suspend states like suspend-to-RAM (STR) often don't work very
+ well with Linux.
+
+ It creates an image which is saved in your active swap. Upon the next
+ boot, pass the 'resume=/dev/swappartition' argument to the kernel to
+ have it detect the saved image, restore memory state from it, and
+ continue to run as before. If you do not want the previous state to
+ be reloaded, then use the 'noresume' kernel command line argument.
+ Note, however, that fsck will be run on your filesystems and you will
+ need to run mkswap against the swap partition used for the suspend.
+
+ It also works with swap files to a limited extent (for details see
+ <file:Documentation/power/swsusp-and-swap-files.txt>).
+
+ Right now you may boot without resuming and resume later but in the
+ meantime you cannot use the swap partition(s)/file(s) involved in
+ suspending. Also in this case you must not use the filesystems
+ that were mounted before the suspend. In particular, you MUST NOT
+ MOUNT any journaled filesystems mounted before the suspend or they
+ will get corrupted in a nasty way.
+
+ For more information take a look at <file:Documentation/power/swsusp.txt>.
+
+config ARCH_SAVE_PAGE_KEYS
+ bool
+
+config PM_STD_PARTITION
+ string "Default resume partition"
+ depends on HIBERNATION
+ default ""
+ ---help---
+ The default resume partition is the partition that the suspend-
+ to-disk implementation will look for a suspended disk image.
+
+ The partition specified here will be different for almost every user.
+ It should be a valid swap partition (at least for now) that is turned
+ on before suspending.
+
+ The partition specified can be overridden by specifying:
+
+ resume=/dev/<other device>
+
+ which will set the resume partition to the device specified.
+
+ Note there is currently not a way to specify which device to save the
+ suspended image to. It will simply pick the first available swap
+ device.
+
+menuconfig TOI_CORE
+ bool "Enhanced Hibernation (TuxOnIce)"
+ depends on HIBERNATION
+ default y
+ ---help---
+ TuxOnIce is the 'new and improved' suspend support.
+
+ See the TuxOnIce home page (tuxonice.net)
+ for FAQs, HOWTOs and other documentation.
+
+ comment "Image Storage (you need at least one allocator)"
+ depends on TOI_CORE
+
+ config TOI_FILE
+ bool "File Allocator"
+ depends on TOI_CORE
+ default y
+ ---help---
+ This option enables support for storing an image in a
+ simple file. You might want this if your swap is
+ sometimes full enough that you don't have enough spare
+ space to store an image.
+
+ config TOI_SWAP
+ bool "Swap Allocator"
+ depends on TOI_CORE && SWAP
+ default y
+ ---help---
+ This option enables support for storing an image in your
+ swap space.
+
+ comment "General Options"
+ depends on TOI_CORE
+
+ config TOI_PRUNE
+ bool "Image pruning support"
+ depends on TOI_CORE && CRYPTO && BROKEN
+ default y
+ ---help---
+ This option adds support for using cryptoapi hashing
+ algorithms to identify pages with the same content. We
+ then write a much smaller pointer to the first copy of
+ the data instead of a complete (perhaps compressed)
+ additional copy.
+
+ You probably want this, so say Y here.
+
+ comment "No image pruning support available without Cryptoapi support."
+ depends on TOI_CORE && !CRYPTO
+
+ config TOI_CRYPTO
+ bool "Compression support"
+ depends on TOI_CORE && CRYPTO
+ default y
+ ---help---
+ This option adds support for using cryptoapi compression
+ algorithms. Compression is particularly useful as it can
+ more than double your suspend and resume speed (depending
+ upon how well your image compresses).
+
+ You probably want this, so say Y here.
+
+ comment "No compression support available without Cryptoapi support."
+ depends on TOI_CORE && !CRYPTO
+
+ config TOI_USERUI
+ bool "Userspace User Interface support"
+ depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
+ default y
+ ---help---
+ This option enabled support for a userspace based user interface
+ to TuxOnIce, which allows you to have a nice display while suspending
+ and resuming, and also enables features such as pressing escape to
+ cancel a cycle or interactive debugging.
+
+ config TOI_USERUI_DEFAULT_PATH
+ string "Default userui program location"
+ default "/usr/local/sbin/tuxoniceui_text"
+ depends on TOI_USERUI
+ ---help---
+ This entry allows you to specify a default path to the userui binary.
+
+ config TOI_DEFAULT_IMAGE_SIZE_LIMIT
+ int "Default image size limit"
+ range -2 65536
+ default "-2"
+ depends on TOI_CORE
+ ---help---
+ This entry allows you to specify a default image size limit. It can
+ be overridden at run-time using /sys/power/tuxonice/image_size_limit.
+
+ config TOI_KEEP_IMAGE
+ bool "Allow Keep Image Mode"
+ depends on TOI_CORE
+ ---help---
+ This option allows you to keep and image and reuse it. It is intended
+ __ONLY__ for use with systems where all filesystems are mounted read-
+ only (kiosks, for example). To use it, compile this option in and boot
+ normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
+ When you resume, the image will not be removed. You will be unable to turn
+ off swap partitions (assuming you are using the swap allocator), but future
+ suspends simply do a power-down. The image can be updated using the
+ kernel command line parameter suspend_act= to turn off the keep image
+ bit. Keep image mode is a little less user friendly on purpose - it
+ should not be used without thought!
+
+ config TOI_INCREMENTAL
+ bool "Incremental Image Support"
+ depends on TOI_CORE && 64BIT && TOI_KEEP_IMAGE
+ default n
+ ---help---
+ This option enables the work in progress toward using the dirty page
+ tracking to record changes to pages. It is hoped that
+ this will be an initial step toward implementing storing just
+ the differences between consecutive images, which will
+ increase the amount of storage needed for the image, but also
+ increase the speed at which writing an image occurs and
+ reduce the wear and tear on drives.
+
+ At the moment, all that is implemented is the first step of keeping
+ an existing image and then comparing it to the contents in memory
+ (by setting /sys/power/tuxonice/verify_image to 1 and triggering a
+ (fake) resume) to see what the page change tracking should find to be
+ different. If you have verify_image set to 1, TuxOnIce will automatically
+ invalidate the old image when you next try to hibernate, so there's no
+ greater chance of disk corruption than normal.
+
+ comment "No incremental image support available without Keep Image support."
+ depends on TOI_CORE && !TOI_KEEP_IMAGE && 64BIT
+
+ config TOI_REPLACE_SWSUSP
+ bool "Replace swsusp by default"
+ default y
+ depends on TOI_CORE
+ ---help---
+ TuxOnIce can replace swsusp. This option makes that the default state,
+ requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
+ to use the vanilla kernel functionality. Note that your initrd/ramfs will
+ need to do this before trying to resume, too.
+ With overriding swsusp enabled, echoing disk to /sys/power/state will
+ start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
+ the swap and file allocators are compiled in, the swap allocator will be
+ used by default.
+
+ config TOI_IGNORE_LATE_INITCALL
+ bool "Wait for initrd/ramfs to run, by default"
+ default n
+ depends on TOI_CORE
+ ---help---
+ When booting, TuxOnIce can check for an image and start to resume prior
+ to any initrd/ramfs running (via a late initcall).
+
+ If you don't have an initrd/ramfs, this is what you want to happen -
+ otherwise you won't be able to safely resume. You should set this option
+ to 'No'.
+
+ If, however, you want your initrd/ramfs to run anyway before resuming,
+ you need to tell TuxOnIce to ignore that earlier opportunity to resume.
+ This can be done either by using this compile time option, or by
+ overriding this option with the boot-time parameter toi_initramfs_resume_only=1.
+
+ Note that if TuxOnIce can't resume at the earlier opportunity, the
+ value of this option won't matter - the initramfs/initrd (if any) will
+ run anyway.
+
+ menuconfig TOI_CLUSTER
+ bool "Cluster support"
+ default n
+ depends on TOI_CORE && NET && BROKEN
+ ---help---
+ Support for linking multiple machines in a cluster so that they suspend
+ and resume together.
+
+ config TOI_DEFAULT_CLUSTER_INTERFACE
+ string "Default cluster interface"
+ depends on TOI_CLUSTER
+ ---help---
+ The default interface on which to communicate with other nodes in
+ the cluster.
+
+ If no value is set here, cluster support will be disabled by default.
+
+ config TOI_DEFAULT_CLUSTER_KEY
+ string "Default cluster key"
+ default "Default"
+ depends on TOI_CLUSTER
+ ---help---
+ The default key used by this node. All nodes in the same cluster
+ have the same key. Multiple clusters may coexist on the same lan
+ by using different values for this key.
+
+ config TOI_CLUSTER_IMAGE_TIMEOUT
+ int "Timeout when checking for image"
+ default 15
+ depends on TOI_CLUSTER
+ ---help---
+ Timeout (seconds) before continuing to boot when waiting to see
+ whether other nodes might have an image. Set to -1 to wait
+ indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
+ booting sooner than this timeout.
+
+ config TOI_CLUSTER_WAIT_UNTIL_NODES
+ int "Nodes without image before continuing"
+ default 0
+ depends on TOI_CLUSTER
+ ---help---
+ When booting and no image is found, we wait to see if other nodes
+ have an image before continuing to boot. This value lets us
+ continue after seeing a certain number of nodes without an image,
+ instead of continuing to wait for the timeout. Set to 0 to only
+ use the timeout.
+
+ config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
+ string "Default pre-hibernate script"
+ depends on TOI_CLUSTER
+ ---help---
+ The default script to be called when starting to hibernate.
+
+ config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
+ string "Default post-hibernate script"
+ depends on TOI_CLUSTER
+ ---help---
+ The default script to be called after resuming from hibernation.
+
+ config TOI_DEFAULT_WAIT
+ int "Default waiting time for emergency boot messages"
+ default "25"
+ range -1 32768
+ depends on TOI_CORE
+ help
+ TuxOnIce can display warnings very early in the process of resuming,
+ if (for example) it appears that you have booted a kernel that doesn't
+ match an image on disk. It can then give you the opportunity to either
+ continue booting that kernel, or reboot the machine. This option can be
+ used to control how long to wait in such circumstances. -1 means wait
+ forever. 0 means don't wait at all (do the default action, which will
+ generally be to continue booting and remove the image). Values of 1 or
+ more indicate a number of seconds (up to 255) to wait before doing the
+ default.
+
+ config TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
+ int "Default extra pages allowance"
+ default "2000"
+ range 500 32768
+ depends on TOI_CORE
+ help
+ This value controls the default for the allowance TuxOnIce makes for
+ drivers to allocate extra memory during the atomic copy. The default
+ value of 2000 will be okay in most cases. If you are using
+ DRI, the easiest way to find what value to use is to try to hibernate
+ and look at how many pages were actually needed in the sysfs entry
+ /sys/power/tuxonice/debug_info (first number on the last line), adding
+ a little extra because the value is not always the same.
+
+ config TOI_CHECKSUM
+ bool "Checksum pageset2"
+ default n
+ depends on TOI_CORE
+ select CRYPTO
+ select CRYPTO_ALGAPI
+ select CRYPTO_MD4
+ ---help---
+ Adds support for checksumming pageset2 pages, to ensure you really get an
+ atomic copy. Since some filesystems (XFS especially) change metadata even
+ when there's no other activity, we need this to check for pages that have
+ been changed while we were saving the page cache. If your debugging output
+ always says no pages were resaved, you may be able to safely disable this
+ option.
+
+config TOI
+ bool
+ depends on TOI_CORE!=n
+ default y
+
+config TOI_ZRAM_SUPPORT
+ def_bool y
+ depends on TOI && ZRAM!=n
+
+config PM_SLEEP
+ def_bool y
+ depends on SUSPEND || HIBERNATE_CALLBACKS
+ select PM
+
+config PM_SLEEP_SMP
+ def_bool y
+ depends on SMP
+ depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
+ depends on PM_SLEEP
+ select HOTPLUG_CPU
+
+config PM_AUTOSLEEP
+ bool "Opportunistic sleep"
+ depends on PM_SLEEP
+ default n
+ ---help---
+ Allow the kernel to trigger a system transition into a global sleep
+ state automatically whenever there are no active wakeup sources.
+
+config PM_WAKELOCKS
+ bool "User space wakeup sources interface"
+ depends on PM_SLEEP
+ default n
+ ---help---
+ Allow user space to create, activate and deactivate wakeup source
+ objects with the help of a sysfs-based interface.
+
+config PM_WAKELOCKS_LIMIT
+ int "Maximum number of user space wakeup sources (0 = no limit)"
+ range 0 100000
+ default 100
+ depends on PM_WAKELOCKS
+
+config PM_WAKELOCKS_GC
+ bool "Garbage collector for user space wakeup sources"
+ depends on PM_WAKELOCKS
+ default y
+
+config PM
+ bool "Device power management core functionality"
+ ---help---
+ Enable functionality allowing I/O devices to be put into energy-saving
+ (low power) states, for example after a specified period of inactivity
+ (autosuspended), and woken up in response to a hardware-generated
+ wake-up event or a driver's request.
+
+ Hardware support is generally required for this functionality to work
+ and the bus type drivers of the buses the devices are on are
+ responsible for the actual handling of device suspend requests and
+ wake-up events.
+
+config PM_DEBUG
+ bool "Power Management Debug Support"
+ depends on PM
+ ---help---
+ This option enables various debugging support in the Power Management
+ code. This is helpful when debugging and reporting PM bugs, like
+ suspend support.
+
+config PM_ADVANCED_DEBUG
+ bool "Extra PM attributes in sysfs for low-level debugging/testing"
+ depends on PM_DEBUG
+ ---help---
+ Add extra sysfs attributes allowing one to access some Power Management
+ fields of device objects from user space. If you are not a kernel
+ developer interested in debugging/testing Power Management, say "no".
+
+config PM_TEST_SUSPEND
+ bool "Test suspend/resume and wakealarm during bootup"
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+ ---help---
+ This option will let you suspend your machine during bootup, and
+ make it wake up a few seconds later using an RTC wakeup alarm.
+ Enable this with a kernel parameter like "test_suspend=mem".
+
+ You probably want to have your system's RTC driver statically
+ linked, ensuring that it's available when this test runs.
+
+config PM_SLEEP_DEBUG
+ def_bool y
+ depends on PM_DEBUG && PM_SLEEP
+
+config DPM_WATCHDOG
+ bool "Device suspend/resume watchdog"
+ depends on PM_DEBUG && PSTORE
+ ---help---
+ Sets up a watchdog timer to capture drivers that are
+ locked up attempting to suspend/resume a device.
+ A detected lockup causes system panic with message
+ captured in pstore device for inspection in subsequent
+ boot session.
+
+config DPM_WATCHDOG_TIMEOUT
+ int "Watchdog timeout in seconds"
+ range 1 120
+ default 60
+ depends on DPM_WATCHDOG
+
+config PM_TRACE
+ bool
+ help
+ This enables code to save the last PM event point across
+ reboot. The architecture needs to support this, x86 for
+ example does by saving things in the RTC, see below.
+
+ The architecture specific code must provide the extern
+ functions from <linux/resume-trace.h> as well as the
+ <asm/resume-trace.h> header with a TRACE_RESUME() macro.
+
+ The way the information is presented is architecture-
+ dependent, x86 will print the information during a
+ late_initcall.
+
+config PM_TRACE_RTC
+ bool "Suspend/resume event tracing"
+ depends on PM_SLEEP_DEBUG
+ depends on X86
+ select PM_TRACE
+ ---help---
+ This enables some cheesy code to save the last PM event point in the
+ RTC across reboots, so that you can debug a machine that just hangs
+ during suspend (or more commonly, during resume).
+
+ To use this debugging feature you should attempt to suspend the
+ machine, reboot it and then run
+
+ dmesg -s 1000000 | grep 'hash matches'
+
+ CAUTION: this option will cause your machine's real-time clock to be
+ set to an invalid time after a resume.
+
+config APM_EMULATION
+ tristate "Advanced Power Management Emulation"
+ depends on PM && SYS_SUPPORTS_APM_EMULATION
+ help
+ APM is a BIOS specification for saving power using several different
+ techniques. This is mostly useful for battery powered laptops with
+ APM compliant BIOSes. If you say Y here, the system time will be
+ reset after a RESUME operation, the /proc/apm device will provide
+ battery status information, and user-space programs will receive
+ notification of APM "events" (e.g. battery status change).
+
+ In order to use APM, you will need supporting software. For location
+ and more information, read <file:Documentation/power/apm-acpi.txt>
+ and the Battery Powered Linux mini-HOWTO, available from
+ <http://www.tldp.org/docs.html#howto>.
+
+ This driver does not spin down disk drives (see the hdparm(8)
+ manpage ("man 8 hdparm") for that), and it doesn't turn off
+ VESA-compliant "green" monitors.
+
+ Generally, if you don't have a battery in your machine, there isn't
+ much point in using this driver and you should say N. If you get
+ random kernel OOPSes or reboots that don't seem to be related to
+ anything, try disabling/enabling this option (or disabling/enabling
+ APM in your BIOS).
+
+config PM_OPP
+ bool
+ select SRCU
+ ---help---
+ SOCs have a standard set of tuples consisting of frequency and
+ voltage pairs that the device will support per voltage domain. This
+ is called Operating Performance Point or OPP. The actual definitions
+ of OPP varies over silicon within the same family of devices.
+
+ OPP layer organizes the data internally using device pointers
+ representing individual voltage domains and provides SOC
+ implementations a ready to use framework to manage OPPs.
+ For more information, read <file:Documentation/power/opp.txt>
+
+config PM_CLK
+ def_bool y
+ depends on PM && HAVE_CLK
+
+config PM_GENERIC_DOMAINS
+ bool
+ depends on PM
+
+config WQ_POWER_EFFICIENT_DEFAULT
+ bool "Enable workqueue power-efficient mode by default"
+ depends on PM
+ default n
+ help
+ Per-cpu workqueues are generally preferred because they show
+ better performance thanks to cache locality; unfortunately,
+ per-cpu workqueues tend to be more power hungry than unbound
+ workqueues.
+
+ Enabling workqueue.power_efficient kernel parameter makes the
+ per-cpu workqueues which were observed to contribute
+ significantly to power consumption unbound, leading to measurably
+ lower power usage at the cost of small performance overhead.
+
+ This config option determines whether workqueue.power_efficient
+ is enabled by default.
+
+ If in doubt, say N.
+
+config PM_GENERIC_DOMAINS_SLEEP
+ def_bool y
+ depends on PM_SLEEP && PM_GENERIC_DOMAINS
+
+config PM_GENERIC_DOMAINS_OF
+ def_bool y
+ depends on PM_GENERIC_DOMAINS && OF
+
+config CPU_PM
+ bool
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
new file mode 100644
index 000000000..b8d7b68f7
--- /dev/null
+++ b/kernel/power/Makefile
@@ -0,0 +1,47 @@
+
+ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
+
+tuxonice_core-y := tuxonice_modules.o
+
+obj-$(CONFIG_TOI) += tuxonice_builtin.o
+obj-$(CONFIG_TOI_INCREMENTAL) += tuxonice_incremental.o \
+ tuxonice_copy_before_write.o
+
+tuxonice_core-$(CONFIG_PM_DEBUG) += tuxonice_alloc.o
+
+# Compile these in after allocation debugging, if used.
+
+tuxonice_core-y += tuxonice_sysfs.o tuxonice_highlevel.o \
+ tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
+ tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
+ tuxonice_power_off.o tuxonice_atomic_copy.o
+
+tuxonice_core-$(CONFIG_TOI_CHECKSUM) += tuxonice_checksum.o
+
+tuxonice_core-$(CONFIG_NET) += tuxonice_storage.o tuxonice_netlink.o
+
+obj-$(CONFIG_TOI_CORE) += tuxonice_core.o
+obj-$(CONFIG_TOI_PRUNE) += tuxonice_prune.o
+obj-$(CONFIG_TOI_CRYPTO) += tuxonice_compress.o
+
+tuxonice_bio-y := tuxonice_bio_core.o tuxonice_bio_chains.o \
+ tuxonice_bio_signature.o
+
+obj-$(CONFIG_TOI_SWAP) += tuxonice_bio.o tuxonice_swap.o
+obj-$(CONFIG_TOI_FILE) += tuxonice_bio.o tuxonice_file.o
+obj-$(CONFIG_TOI_CLUSTER) += tuxonice_cluster.o
+
+obj-$(CONFIG_TOI_USERUI) += tuxonice_userui.o
+
+obj-y += qos.o
+obj-$(CONFIG_PM) += main.o
+obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
+obj-$(CONFIG_FREEZER) += process.o
+obj-$(CONFIG_SUSPEND) += suspend.o
+obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
+ block_io.o
+obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
+obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
+
+obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c
new file mode 100644
index 000000000..9012ecf7b
--- /dev/null
+++ b/kernel/power/autosleep.c
@@ -0,0 +1,128 @@
+/*
+ * kernel/power/autosleep.c
+ *
+ * Opportunistic sleep support.
+ *
+ * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl>
+ */
+
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/pm_wakeup.h>
+
+#include "power.h"
+
+static suspend_state_t autosleep_state;
+static struct workqueue_struct *autosleep_wq;
+/*
+ * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source
+ * is active, otherwise a deadlock with try_to_suspend() is possible.
+ * Alternatively mutex_lock_interruptible() can be used. This will then fail
+ * if an auto_sleep cycle tries to freeze processes.
+ */
+static DEFINE_MUTEX(autosleep_lock);
+static struct wakeup_source *autosleep_ws;
+
+static void try_to_suspend(struct work_struct *work)
+{
+ unsigned int initial_count, final_count;
+
+ if (!pm_get_wakeup_count(&initial_count, true))
+ goto out;
+
+ mutex_lock(&autosleep_lock);
+
+ if (!pm_save_wakeup_count(initial_count) ||
+ system_state != SYSTEM_RUNNING) {
+ mutex_unlock(&autosleep_lock);
+ goto out;
+ }
+
+ if (autosleep_state == PM_SUSPEND_ON) {
+ mutex_unlock(&autosleep_lock);
+ return;
+ }
+ if (autosleep_state >= PM_SUSPEND_MAX)
+ hibernate();
+ else
+ pm_suspend(autosleep_state);
+
+ mutex_unlock(&autosleep_lock);
+
+ if (!pm_get_wakeup_count(&final_count, false))
+ goto out;
+
+ /*
+ * If the wakeup occured for an unknown reason, wait to prevent the
+ * system from trying to suspend and waking up in a tight loop.
+ */
+ if (final_count == initial_count)
+ schedule_timeout_uninterruptible(HZ / 2);
+
+ out:
+ queue_up_suspend_work();
+}
+
+static DECLARE_WORK(suspend_work, try_to_suspend);
+
+void queue_up_suspend_work(void)
+{
+ if (autosleep_state > PM_SUSPEND_ON)
+ queue_work(autosleep_wq, &suspend_work);
+}
+
+suspend_state_t pm_autosleep_state(void)
+{
+ return autosleep_state;
+}
+
+int pm_autosleep_lock(void)
+{
+ return mutex_lock_interruptible(&autosleep_lock);
+}
+
+void pm_autosleep_unlock(void)
+{
+ mutex_unlock(&autosleep_lock);
+}
+
+int pm_autosleep_set_state(suspend_state_t state)
+{
+
+#ifndef CONFIG_HIBERNATION
+ if (state >= PM_SUSPEND_MAX)
+ return -EINVAL;
+#endif
+
+ __pm_stay_awake(autosleep_ws);
+
+ mutex_lock(&autosleep_lock);
+
+ autosleep_state = state;
+
+ __pm_relax(autosleep_ws);
+
+ if (state > PM_SUSPEND_ON) {
+ pm_wakep_autosleep_enabled(true);
+ queue_up_suspend_work();
+ } else {
+ pm_wakep_autosleep_enabled(false);
+ }
+
+ mutex_unlock(&autosleep_lock);
+ return 0;
+}
+
+int __init pm_autosleep_init(void)
+{
+ autosleep_ws = wakeup_source_register("autosleep");
+ if (!autosleep_ws)
+ return -ENOMEM;
+
+ autosleep_wq = alloc_ordered_workqueue("autosleep", 0);
+ if (autosleep_wq)
+ return 0;
+
+ wakeup_source_unregister(autosleep_ws);
+ return -ENOMEM;
+}
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
new file mode 100644
index 000000000..9a58bc258
--- /dev/null
+++ b/kernel/power/block_io.c
@@ -0,0 +1,103 @@
+/*
+ * This file provides functions for block I/O operations on swap/file.
+ *
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/bio.h>
+#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+
+#include "power.h"
+
+/**
+ * submit - submit BIO request.
+ * @rw: READ or WRITE.
+ * @off physical offset of page.
+ * @page: page we're reading or writing.
+ * @bio_chain: list of pending biod (for async reading)
+ *
+ * Straight from the textbook - allocate and initialize the bio.
+ * If we're reading, make sure the page is marked as dirty.
+ * Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, struct block_device *bdev, sector_t sector,
+ struct page *page, struct bio **bio_chain)
+{
+ const int bio_rw = rw | REQ_SYNC;
+ struct bio *bio;
+
+ bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio->bi_end_io = end_swap_bio_read;
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+ (unsigned long long)sector);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ lock_page(page);
+ bio_get(bio);
+
+ if (bio_chain == NULL) {
+ submit_bio(bio_rw, bio);
+ wait_on_page_locked(page);
+ if (rw == READ)
+ bio_set_pages_dirty(bio);
+ bio_put(bio);
+ } else {
+ if (rw == READ)
+ get_page(page); /* These pages are freed later */
+ bio->bi_private = *bio_chain;
+ *bio_chain = bio;
+ submit_bio(bio_rw, bio);
+ }
+ return 0;
+}
+
+int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+ virt_to_page(addr), bio_chain);
+}
+
+int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+ return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+ virt_to_page(addr), bio_chain);
+}
+
+int hib_wait_on_bio_chain(struct bio **bio_chain)
+{
+ struct bio *bio;
+ struct bio *next_bio;
+ int ret = 0;
+
+ if (bio_chain == NULL)
+ return 0;
+
+ bio = *bio_chain;
+ if (bio == NULL)
+ return 0;
+ while (bio) {
+ struct page *page;
+
+ next_bio = bio->bi_private;
+ page = bio->bi_io_vec[0].bv_page;
+ wait_on_page_locked(page);
+ if (!PageUptodate(page) || PageError(page))
+ ret = -EIO;
+ put_page(page);
+ bio_put(bio);
+ bio = next_bio;
+ }
+ *bio_chain = NULL;
+ return ret;
+}
diff --git a/kernel/power/console.c b/kernel/power/console.c
new file mode 100644
index 000000000..aba9c545a
--- /dev/null
+++ b/kernel/power/console.c
@@ -0,0 +1,151 @@
+/*
+ * Functions for saving/restoring console.
+ *
+ * Originally from swsusp.
+ */
+
+#include <linux/console.h>
+#include <linux/vt_kern.h>
+#include <linux/kbd_kern.h>
+#include <linux/vt.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "power.h"
+
+#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
+
+static int orig_fgconsole, orig_kmsg;
+
+static DEFINE_MUTEX(vt_switch_mutex);
+
+struct pm_vt_switch {
+ struct list_head head;
+ struct device *dev;
+ bool required;
+};
+
+static LIST_HEAD(pm_vt_switch_list);
+
+
+/**
+ * pm_vt_switch_required - indicate VT switch at suspend requirements
+ * @dev: device
+ * @required: if true, caller needs VT switch at suspend/resume time
+ *
+ * The different console drivers may or may not require VT switches across
+ * suspend/resume, depending on how they handle restoring video state and
+ * what may be running.
+ *
+ * Drivers can indicate support for switchless suspend/resume, which can
+ * save time and flicker, by using this routine and passing 'false' as
+ * the argument. If any loaded driver needs VT switching, or the
+ * no_console_suspend argument has been passed on the command line, VT
+ * switches will occur.
+ */
+void pm_vt_switch_required(struct device *dev, bool required)
+{
+ struct pm_vt_switch *entry, *tmp;
+
+ mutex_lock(&vt_switch_mutex);
+ list_for_each_entry(tmp, &pm_vt_switch_list, head) {
+ if (tmp->dev == dev) {
+ /* already registered, update requirement */
+ tmp->required = required;
+ goto out;
+ }
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto out;
+
+ entry->required = required;
+ entry->dev = dev;
+
+ list_add(&entry->head, &pm_vt_switch_list);
+out:
+ mutex_unlock(&vt_switch_mutex);
+}
+EXPORT_SYMBOL(pm_vt_switch_required);
+
+/**
+ * pm_vt_switch_unregister - stop tracking a device's VT switching needs
+ * @dev: device
+ *
+ * Remove @dev from the vt switch list.
+ */
+void pm_vt_switch_unregister(struct device *dev)
+{
+ struct pm_vt_switch *tmp;
+
+ mutex_lock(&vt_switch_mutex);
+ list_for_each_entry(tmp, &pm_vt_switch_list, head) {
+ if (tmp->dev == dev) {
+ list_del(&tmp->head);
+ kfree(tmp);
+ break;
+ }
+ }
+ mutex_unlock(&vt_switch_mutex);
+}
+EXPORT_SYMBOL(pm_vt_switch_unregister);
+
+/*
+ * There are three cases when a VT switch on suspend/resume are required:
+ * 1) no driver has indicated a requirement one way or another, so preserve
+ * the old behavior
+ * 2) console suspend is disabled, we want to see debug messages across
+ * suspend/resume
+ * 3) any registered driver indicates it needs a VT switch
+ *
+ * If none of these conditions is present, meaning we have at least one driver
+ * that doesn't need the switch, and none that do, we can avoid it to make
+ * resume look a little prettier (and suspend too, but that's usually hidden,
+ * e.g. when closing the lid on a laptop).
+ */
+static bool pm_vt_switch(void)
+{
+ struct pm_vt_switch *entry;
+ bool ret = true;
+
+ mutex_lock(&vt_switch_mutex);
+ if (list_empty(&pm_vt_switch_list))
+ goto out;
+
+ if (!console_suspend_enabled)
+ goto out;
+
+ list_for_each_entry(entry, &pm_vt_switch_list, head) {
+ if (entry->required)
+ goto out;
+ }
+
+ ret = false;
+out:
+ mutex_unlock(&vt_switch_mutex);
+ return ret;
+}
+
+int pm_prepare_console(void)
+{
+ if (!pm_vt_switch())
+ return 0;
+
+ orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
+ if (orig_fgconsole < 0)
+ return 1;
+
+ orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
+ return 0;
+}
+
+void pm_restore_console(void)
+{
+ if (!pm_vt_switch())
+ return;
+
+ if (orig_fgconsole >= 0) {
+ vt_move_to_console(orig_fgconsole, 0);
+ vt_kmsg_redirect(orig_kmsg);
+ }
+}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
new file mode 100644
index 000000000..b7d3bc724
--- /dev/null
+++ b/kernel/power/hibernate.c
@@ -0,0 +1,1178 @@
+/*
+ * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2004 Pavel Machek <pavel@ucw.cz>
+ * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc.
+ * Copyright (C) 2012 Bojan Smojver <bojan@rexursive.com>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/export.h>
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/async.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pm.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <linux/gfp.h>
+#include <linux/syscore_ops.h>
+#include <linux/ctype.h>
+#include <linux/genhd.h>
+#include <linux/ktime.h>
+#include <trace/events/power.h>
+
+#include "tuxonice.h"
+
+
+static int nocompress;
+static int noresume;
+static int nohibernate;
+static int resume_wait;
+static unsigned int resume_delay;
+char resume_file[256] = CONFIG_PM_STD_PARTITION;
+dev_t swsusp_resume_device;
+sector_t swsusp_resume_block;
+__visible int in_suspend __nosavedata;
+
+enum {
+ HIBERNATION_INVALID,
+ HIBERNATION_PLATFORM,
+ HIBERNATION_SHUTDOWN,
+ HIBERNATION_REBOOT,
+#ifdef CONFIG_SUSPEND
+ HIBERNATION_SUSPEND,
+#endif
+ /* keep last */
+ __HIBERNATION_AFTER_LAST
+};
+#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1)
+#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1)
+
+static int hibernation_mode = HIBERNATION_SHUTDOWN;
+
+bool freezer_test_done;
+
+static const struct platform_hibernation_ops *hibernation_ops;
+
+bool hibernation_available(void)
+{
+ return (nohibernate == 0);
+}
+
+/**
+ * hibernation_set_ops - Set the global hibernate operations.
+ * @ops: Hibernation operations to use in subsequent hibernation transitions.
+ */
+void hibernation_set_ops(const struct platform_hibernation_ops *ops)
+{
+ if (ops && !(ops->begin && ops->end && ops->pre_snapshot
+ && ops->prepare && ops->finish && ops->enter && ops->pre_restore
+ && ops->restore_cleanup && ops->leave)) {
+ WARN_ON(1);
+ return;
+ }
+ lock_system_sleep();
+ hibernation_ops = ops;
+ if (ops)
+ hibernation_mode = HIBERNATION_PLATFORM;
+ else if (hibernation_mode == HIBERNATION_PLATFORM)
+ hibernation_mode = HIBERNATION_SHUTDOWN;
+
+ unlock_system_sleep();
+}
+EXPORT_SYMBOL_GPL(hibernation_set_ops);
+
+static bool entering_platform_hibernation;
+
+bool system_entering_hibernation(void)
+{
+ return entering_platform_hibernation;
+}
+EXPORT_SYMBOL(system_entering_hibernation);
+
+#ifdef CONFIG_PM_DEBUG
+static void hibernation_debug_sleep(void)
+{
+ printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+ mdelay(5000);
+}
+
+static int hibernation_test(int level)
+{
+ if (pm_test_level == level) {
+ hibernation_debug_sleep();
+ return 1;
+ }
+ return 0;
+}
+#else /* !CONFIG_PM_DEBUG */
+static int hibernation_test(int level) { return 0; }
+#endif /* !CONFIG_PM_DEBUG */
+
+/**
+ * platform_begin - Call platform to start hibernation.
+ * @platform_mode: Whether or not to use the platform driver.
+ */
+int platform_begin(int platform_mode)
+{
+ return (platform_mode && hibernation_ops) ?
+ hibernation_ops->begin() : 0;
+}
+
+/**
+ * platform_end - Call platform to finish transition to the working state.
+ * @platform_mode: Whether or not to use the platform driver.
+ */
+void platform_end(int platform_mode)
+{
+ if (platform_mode && hibernation_ops)
+ hibernation_ops->end();
+}
+
+/**
+ * platform_pre_snapshot - Call platform to prepare the machine for hibernation.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Use the platform driver to prepare the system for creating a hibernate image,
+ * if so configured, and return an error code if that fails.
+ */
+
+int platform_pre_snapshot(int platform_mode)
+{
+ return (platform_mode && hibernation_ops) ?
+ hibernation_ops->pre_snapshot() : 0;
+}
+
+/**
+ * platform_leave - Call platform to prepare a transition to the working state.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Use the platform driver prepare to prepare the machine for switching to the
+ * normal mode of operation.
+ *
+ * This routine is called on one CPU with interrupts disabled.
+ */
+void platform_leave(int platform_mode)
+{
+ if (platform_mode && hibernation_ops)
+ hibernation_ops->leave();
+}
+
+/**
+ * platform_finish - Call platform to switch the system to the working state.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Use the platform driver to switch the machine to the normal mode of
+ * operation.
+ *
+ * This routine must be called after platform_prepare().
+ */
+void platform_finish(int platform_mode)
+{
+ if (platform_mode && hibernation_ops)
+ hibernation_ops->finish();
+}
+
+/**
+ * platform_pre_restore - Prepare for hibernate image restoration.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Use the platform driver to prepare the system for resume from a hibernation
+ * image.
+ *
+ * If the restore fails after this function has been called,
+ * platform_restore_cleanup() must be called.
+ */
+int platform_pre_restore(int platform_mode)
+{
+ return (platform_mode && hibernation_ops) ?
+ hibernation_ops->pre_restore() : 0;
+}
+
+/**
+ * platform_restore_cleanup - Switch to the working state after failing restore.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Use the platform driver to switch the system to the normal mode of operation
+ * after a failing restore.
+ *
+ * If platform_pre_restore() has been called before the failing restore, this
+ * function must be called too, regardless of the result of
+ * platform_pre_restore().
+ */
+void platform_restore_cleanup(int platform_mode)
+{
+ if (platform_mode && hibernation_ops)
+ hibernation_ops->restore_cleanup();
+}
+
+/**
+ * platform_recover - Recover from a failure to suspend devices.
+ * @platform_mode: Whether or not to use the platform driver.
+ */
+void platform_recover(int platform_mode)
+{
+ if (platform_mode && hibernation_ops && hibernation_ops->recover)
+ hibernation_ops->recover();
+}
+
+/**
+ * swsusp_show_speed - Print time elapsed between two events during hibernation.
+ * @start: Starting event.
+ * @stop: Final event.
+ * @nr_pages: Number of memory pages processed between @start and @stop.
+ * @msg: Additional diagnostic message to print.
+ */
+void swsusp_show_speed(ktime_t start, ktime_t stop,
+ unsigned nr_pages, char *msg)
+{
+ ktime_t diff;
+ u64 elapsed_centisecs64;
+ unsigned int centisecs;
+ unsigned int k;
+ unsigned int kps;
+
+ diff = ktime_sub(stop, start);
+ elapsed_centisecs64 = ktime_divns(diff, 10*NSEC_PER_MSEC);
+ centisecs = elapsed_centisecs64;
+ if (centisecs == 0)
+ centisecs = 1; /* avoid div-by-zero */
+ k = nr_pages * (PAGE_SIZE / 1024);
+ kps = (k * 100) / centisecs;
+ printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
+ msg, k,
+ centisecs / 100, centisecs % 100,
+ kps / 1000, (kps % 1000) / 10);
+}
+
+/**
+ * create_image - Create a hibernation image.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Execute device drivers' "late" and "noirq" freeze callbacks, create a
+ * hibernation image and run the drivers' "noirq" and "early" thaw callbacks.
+ *
+ * Control reappears in this routine after the subsequent restore.
+ */
+static int create_image(int platform_mode)
+{
+ int error;
+
+ error = dpm_suspend_end(PMSG_FREEZE);
+ if (error) {
+ printk(KERN_ERR "PM: Some devices failed to power down, "
+ "aborting hibernation\n");
+ return error;
+ }
+
+ error = platform_pre_snapshot(platform_mode);
+ if (error || hibernation_test(TEST_PLATFORM))
+ goto Platform_finish;
+
+ error = disable_nonboot_cpus();
+ if (error || hibernation_test(TEST_CPUS))
+ goto Enable_cpus;
+
+ local_irq_disable();
+
+ error = syscore_suspend();
+ if (error) {
+ printk(KERN_ERR "PM: Some system devices failed to power down, "
+ "aborting hibernation\n");
+ goto Enable_irqs;
+ }
+
+ if (hibernation_test(TEST_CORE) || pm_wakeup_pending())
+ goto Power_up;
+
+ in_suspend = 1;
+ save_processor_state();
+ trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
+ error = swsusp_arch_suspend();
+ trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
+ if (error)
+ printk(KERN_ERR "PM: Error %d creating hibernation image\n",
+ error);
+ /* Restore control flow magically appears here */
+ restore_processor_state();
+ if (!in_suspend)
+ events_check_enabled = false;
+
+ platform_leave(platform_mode);
+
+ Power_up:
+ syscore_resume();
+
+ Enable_irqs:
+ local_irq_enable();
+
+ Enable_cpus:
+ enable_nonboot_cpus();
+
+ Platform_finish:
+ platform_finish(platform_mode);
+
+ dpm_resume_start(in_suspend ?
+ (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
+
+ return error;
+}
+
+/**
+ * hibernation_snapshot - Quiesce devices and create a hibernation image.
+ * @platform_mode: If set, use platform driver to prepare for the transition.
+ *
+ * This routine must be called with pm_mutex held.
+ */
+int hibernation_snapshot(int platform_mode)
+{
+ pm_message_t msg;
+ int error;
+
+ error = platform_begin(platform_mode);
+ if (error)
+ goto Close;
+
+ /* Preallocate image memory before shutting down devices. */
+ error = hibernate_preallocate_memory();
+ if (error)
+ goto Close;
+
+ error = freeze_kernel_threads();
+ if (error)
+ goto Cleanup;
+
+ if (hibernation_test(TEST_FREEZER)) {
+
+ /*
+ * Indicate to the caller that we are returning due to a
+ * successful freezer test.
+ */
+ freezer_test_done = true;
+ goto Thaw;
+ }
+
+ error = dpm_prepare(PMSG_FREEZE);
+ if (error) {
+ dpm_complete(PMSG_RECOVER);
+ goto Thaw;
+ }
+
+ suspend_console();
+ pm_restrict_gfp_mask();
+
+ error = dpm_suspend(PMSG_FREEZE);
+
+ if (error || hibernation_test(TEST_DEVICES))
+ platform_recover(platform_mode);
+ else
+ error = create_image(platform_mode);
+
+ /*
+ * In the case that we call create_image() above, the control
+ * returns here (1) after the image has been created or the
+ * image creation has failed and (2) after a successful restore.
+ */
+
+ /* We may need to release the preallocated image pages here. */
+ if (error || !in_suspend)
+ swsusp_free();
+
+ msg = in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE;
+ dpm_resume(msg);
+
+ if (error || !in_suspend)
+ pm_restore_gfp_mask();
+
+ resume_console();
+ dpm_complete(msg);
+
+ Close:
+ platform_end(platform_mode);
+ return error;
+
+ Thaw:
+ thaw_kernel_threads();
+ Cleanup:
+ swsusp_free();
+ goto Close;
+}
+
+/**
+ * resume_target_kernel - Restore system state from a hibernation image.
+ * @platform_mode: Whether or not to use the platform driver.
+ *
+ * Execute device drivers' "noirq" and "late" freeze callbacks, restore the
+ * contents of highmem that have not been restored yet from the image and run
+ * the low-level code that will restore the remaining contents of memory and
+ * switch to the just restored target kernel.
+ */
+static int resume_target_kernel(bool platform_mode)
+{
+ int error;
+
+ error = dpm_suspend_end(PMSG_QUIESCE);
+ if (error) {
+ printk(KERN_ERR "PM: Some devices failed to power down, "
+ "aborting resume\n");
+ return error;
+ }
+
+ error = platform_pre_restore(platform_mode);
+ if (error)
+ goto Cleanup;
+
+ error = disable_nonboot_cpus();
+ if (error)
+ goto Enable_cpus;
+
+ local_irq_disable();
+
+ error = syscore_suspend();
+ if (error)
+ goto Enable_irqs;
+
+ save_processor_state();
+ error = restore_highmem();
+ if (!error) {
+ error = swsusp_arch_resume();
+ /*
+ * The code below is only ever reached in case of a failure.
+ * Otherwise, execution continues at the place where
+ * swsusp_arch_suspend() was called.
+ */
+ BUG_ON(!error);
+ /*
+ * This call to restore_highmem() reverts the changes made by
+ * the previous one.
+ */
+ restore_highmem();
+ }
+ /*
+ * The only reason why swsusp_arch_resume() can fail is memory being
+ * very tight, so we have to free it as soon as we can to avoid
+ * subsequent failures.
+ */
+ swsusp_free();
+ restore_processor_state();
+ touch_softlockup_watchdog();
+
+ syscore_resume();
+
+ Enable_irqs:
+ local_irq_enable();
+
+ Enable_cpus:
+ enable_nonboot_cpus();
+
+ Cleanup:
+ platform_restore_cleanup(platform_mode);
+
+ dpm_resume_start(PMSG_RECOVER);
+
+ return error;
+}
+
+/**
+ * hibernation_restore - Quiesce devices and restore from a hibernation image.
+ * @platform_mode: If set, use platform driver to prepare for the transition.
+ *
+ * This routine must be called with pm_mutex held. If it is successful, control
+ * reappears in the restored target kernel in hibernation_snapshot().
+ */
+int hibernation_restore(int platform_mode)
+{
+ int error;
+
+ pm_prepare_console();
+ suspend_console();
+ pm_restrict_gfp_mask();
+ error = dpm_suspend_start(PMSG_QUIESCE);
+ if (!error) {
+ error = resume_target_kernel(platform_mode);
+ /*
+ * The above should either succeed and jump to the new kernel,
+ * or return with an error. Otherwise things are just
+ * undefined, so let's be paranoid.
+ */
+ BUG_ON(!error);
+ }
+ dpm_resume_end(PMSG_RECOVER);
+ pm_restore_gfp_mask();
+ resume_console();
+ pm_restore_console();
+ return error;
+}
+
+/**
+ * hibernation_platform_enter - Power off the system using the platform driver.
+ */
+int hibernation_platform_enter(void)
+{
+ int error;
+
+ if (!hibernation_ops)
+ return -ENOSYS;
+
+ /*
+ * We have cancelled the power transition by running
+ * hibernation_ops->finish() before saving the image, so we should let
+ * the firmware know that we're going to enter the sleep state after all
+ */
+ error = hibernation_ops->begin();
+ if (error)
+ goto Close;
+
+ entering_platform_hibernation = true;
+ suspend_console();
+ error = dpm_suspend_start(PMSG_HIBERNATE);
+ if (error) {
+ if (hibernation_ops->recover)
+ hibernation_ops->recover();
+ goto Resume_devices;
+ }
+
+ error = dpm_suspend_end(PMSG_HIBERNATE);
+ if (error)
+ goto Resume_devices;
+
+ error = hibernation_ops->prepare();
+ if (error)
+ goto Platform_finish;
+
+ error = disable_nonboot_cpus();
+ if (error)
+ goto Platform_finish;
+
+ local_irq_disable();
+ syscore_suspend();
+ if (pm_wakeup_pending()) {
+ error = -EAGAIN;
+ goto Power_up;
+ }
+
+ hibernation_ops->enter();
+ /* We should never get here */
+ while (1);
+
+ Power_up:
+ syscore_resume();
+ local_irq_enable();
+ enable_nonboot_cpus();
+
+ Platform_finish:
+ hibernation_ops->finish();
+
+ dpm_resume_start(PMSG_RESTORE);
+
+ Resume_devices:
+ entering_platform_hibernation = false;
+ dpm_resume_end(PMSG_RESTORE);
+ resume_console();
+
+ Close:
+ hibernation_ops->end();
+
+ return error;
+}
+
+/**
+ * power_down - Shut the machine down for hibernation.
+ *
+ * Use the platform driver, if configured, to put the system into the sleep
+ * state corresponding to hibernation, or try to power it off or reboot,
+ * depending on the value of hibernation_mode.
+ */
+static void power_down(void)
+{
+#ifdef CONFIG_SUSPEND
+ int error;
+#endif
+
+ switch (hibernation_mode) {
+ case HIBERNATION_REBOOT:
+ kernel_restart(NULL);
+ break;
+ case HIBERNATION_PLATFORM:
+ hibernation_platform_enter();
+ case HIBERNATION_SHUTDOWN:
+ if (pm_power_off)
+ kernel_power_off();
+ break;
+#ifdef CONFIG_SUSPEND
+ case HIBERNATION_SUSPEND:
+ error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+ if (error) {
+ if (hibernation_ops)
+ hibernation_mode = HIBERNATION_PLATFORM;
+ else
+ hibernation_mode = HIBERNATION_SHUTDOWN;
+ power_down();
+ }
+ /*
+ * Restore swap signature.
+ */
+ error = swsusp_unmark();
+ if (error)
+ printk(KERN_ERR "PM: Swap will be unusable! "
+ "Try swapon -a.\n");
+ return;
+#endif
+ }
+ kernel_halt();
+ /*
+ * Valid image is on the disk, if we continue we risk serious data
+ * corruption after resume.
+ */
+ printk(KERN_CRIT "PM: Please power down manually\n");
+ while (1)
+ cpu_relax();
+}
+
+/**
+ * hibernate - Carry out system hibernation, including saving the image.
+ */
+int hibernate(void)
+{
+ int error;
+
+ if (test_action_state(TOI_REPLACE_SWSUSP))
+ return try_tuxonice_hibernate();
+
+ if (!hibernation_available()) {
+ pr_debug("PM: Hibernation not available.\n");
+ return -EPERM;
+ }
+
+ lock_system_sleep();
+ /* The snapshot device should not be opened while we're running */
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
+
+ pm_prepare_console();
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ goto Exit;
+
+ printk(KERN_INFO "PM: Syncing filesystems ... ");
+ sys_sync();
+ printk("done.\n");
+
+ error = freeze_processes();
+ if (error)
+ goto Exit;
+
+ lock_device_hotplug();
+ /* Allocate memory management structures */
+ error = create_basic_memory_bitmaps();
+ if (error)
+ goto Thaw;
+
+ error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
+ if (error || freezer_test_done)
+ goto Free_bitmaps;
+
+ if (in_suspend) {
+ unsigned int flags = 0;
+
+ if (hibernation_mode == HIBERNATION_PLATFORM)
+ flags |= SF_PLATFORM_MODE;
+ if (nocompress)
+ flags |= SF_NOCOMPRESS_MODE;
+ else
+ flags |= SF_CRC32_MODE;
+
+ pr_debug("PM: writing image.\n");
+ error = swsusp_write(flags);
+ swsusp_free();
+ if (!error)
+ power_down();
+ in_suspend = 0;
+ pm_restore_gfp_mask();
+ } else {
+ pr_debug("PM: Image restored successfully.\n");
+ }
+
+ Free_bitmaps:
+ free_basic_memory_bitmaps();
+ Thaw:
+ unlock_device_hotplug();
+ thaw_processes();
+
+ /* Don't bother checking whether freezer_test_done is true */
+ freezer_test_done = false;
+ Exit:
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
+ pm_restore_console();
+ atomic_inc(&snapshot_device_available);
+ Unlock:
+ unlock_system_sleep();
+ return error;
+}
+
+
+/**
+ * software_resume - Resume from a saved hibernation image.
+ *
+ * This routine is called as a late initcall, when all devices have been
+ * discovered and initialized already.
+ *
+ * The image reading code is called to see if there is a hibernation image
+ * available for reading. If that is the case, devices are quiesced and the
+ * contents of memory is restored from the saved image.
+ *
+ * If this is successful, control reappears in the restored target kernel in
+ * hibernation_snaphot() which returns to hibernate(). Otherwise, the routine
+ * attempts to recover gracefully and make the kernel return to the normal mode
+ * of operation.
+ */
+int software_resume(void)
+{
+ int error;
+ unsigned int flags;
+
+ resume_attempted = 1;
+
+ /*
+ * We can't know (until an image header - if any - is loaded), whether
+ * we did override swsusp. We therefore ensure that both are tried.
+ */
+ try_tuxonice_resume();
+
+ /*
+ * If the user said "noresume".. bail out early.
+ */
+ if (noresume || !hibernation_available())
+ return 0;
+
+ /*
+ * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
+ * is configured into the kernel. Since the regular hibernate
+ * trigger path is via sysfs which takes a buffer mutex before
+ * calling hibernate functions (which take pm_mutex) this can
+ * cause lockdep to complain about a possible ABBA deadlock
+ * which cannot happen since we're in the boot code here and
+ * sysfs can't be invoked yet. Therefore, we use a subclass
+ * here to avoid lockdep complaining.
+ */
+ mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+
+ if (swsusp_resume_device)
+ goto Check_image;
+
+ if (!strlen(resume_file)) {
+ error = -ENOENT;
+ goto Unlock;
+ }
+
+ pr_debug("PM: Checking hibernation image partition %s\n", resume_file);
+
+ if (resume_delay) {
+ printk(KERN_INFO "Waiting %dsec before reading resume device...\n",
+ resume_delay);
+ ssleep(resume_delay);
+ }
+
+ /* Check if the device is there */
+ swsusp_resume_device = name_to_dev_t(resume_file);
+
+ /*
+ * name_to_dev_t is ineffective to verify parition if resume_file is in
+ * integer format. (e.g. major:minor)
+ */
+ if (isdigit(resume_file[0]) && resume_wait) {
+ int partno;
+ while (!get_gendisk(swsusp_resume_device, &partno))
+ msleep(10);
+ }
+
+ if (!swsusp_resume_device) {
+ /*
+ * Some device discovery might still be in progress; we need
+ * to wait for this to finish.
+ */
+ wait_for_device_probe();
+
+ if (resume_wait) {
+ while ((swsusp_resume_device = name_to_dev_t(resume_file)) == 0)
+ msleep(10);
+ async_synchronize_full();
+ }
+
+ swsusp_resume_device = name_to_dev_t(resume_file);
+ if (!swsusp_resume_device) {
+ error = -ENODEV;
+ goto Unlock;
+ }
+ }
+
+ Check_image:
+ pr_debug("PM: Hibernation image partition %d:%d present\n",
+ MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
+
+ pr_debug("PM: Looking for hibernation image.\n");
+ error = swsusp_check();
+ if (error)
+ goto Unlock;
+
+ /* The snapshot device should not be opened while we're running */
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ swsusp_close(FMODE_READ);
+ goto Unlock;
+ }
+
+ pm_prepare_console();
+ error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (error)
+ goto Close_Finish;
+
+ pr_debug("PM: Preparing processes for restore.\n");
+ error = freeze_processes();
+ if (error)
+ goto Close_Finish;
+
+ pr_debug("PM: Loading hibernation image.\n");
+
+ lock_device_hotplug();
+ error = create_basic_memory_bitmaps();
+ if (error)
+ goto Thaw;
+
+ error = swsusp_read(&flags);
+ swsusp_close(FMODE_READ);
+ if (!error)
+ hibernation_restore(flags & SF_PLATFORM_MODE);
+
+ printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
+ swsusp_free();
+ free_basic_memory_bitmaps();
+ Thaw:
+ unlock_device_hotplug();
+ thaw_processes();
+ Finish:
+ pm_notifier_call_chain(PM_POST_RESTORE);
+ pm_restore_console();
+ atomic_inc(&snapshot_device_available);
+ /* For success case, the suspend path will release the lock */
+ Unlock:
+ mutex_unlock(&pm_mutex);
+ pr_debug("PM: Hibernation image not present or could not be loaded.\n");
+ return error;
+ Close_Finish:
+ swsusp_close(FMODE_READ);
+ goto Finish;
+}
+
+late_initcall_sync(software_resume);
+
+
+static const char * const hibernation_modes[] = {
+ [HIBERNATION_PLATFORM] = "platform",
+ [HIBERNATION_SHUTDOWN] = "shutdown",
+ [HIBERNATION_REBOOT] = "reboot",
+#ifdef CONFIG_SUSPEND
+ [HIBERNATION_SUSPEND] = "suspend",
+#endif
+};
+
+/*
+ * /sys/power/disk - Control hibernation mode.
+ *
+ * Hibernation can be handled in several ways. There are a few different ways
+ * to put the system into the sleep state: using the platform driver (e.g. ACPI
+ * or other hibernation_ops), powering it off or rebooting it (for testing
+ * mostly).
+ *
+ * The sysfs file /sys/power/disk provides an interface for selecting the
+ * hibernation mode to use. Reading from this file causes the available modes
+ * to be printed. There are 3 modes that can be supported:
+ *
+ * 'platform'
+ * 'shutdown'
+ * 'reboot'
+ *
+ * If a platform hibernation driver is in use, 'platform' will be supported
+ * and will be used by default. Otherwise, 'shutdown' will be used by default.
+ * The selected option (i.e. the one corresponding to the current value of
+ * hibernation_mode) is enclosed by a square bracket.
+ *
+ * To select a given hibernation mode it is necessary to write the mode's
+ * string representation (as returned by reading from /sys/power/disk) back
+ * into /sys/power/disk.
+ */
+
+static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ int i;
+ char *start = buf;
+
+ if (!hibernation_available())
+ return sprintf(buf, "[disabled]\n");
+
+ for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+ if (!hibernation_modes[i])
+ continue;
+ switch (i) {
+ case HIBERNATION_SHUTDOWN:
+ case HIBERNATION_REBOOT:
+#ifdef CONFIG_SUSPEND
+ case HIBERNATION_SUSPEND:
+#endif
+ break;
+ case HIBERNATION_PLATFORM:
+ if (hibernation_ops)
+ break;
+ /* not a valid mode, continue with loop */
+ continue;
+ }
+ if (i == hibernation_mode)
+ buf += sprintf(buf, "[%s] ", hibernation_modes[i]);
+ else
+ buf += sprintf(buf, "%s ", hibernation_modes[i]);
+ }
+ buf += sprintf(buf, "\n");
+ return buf-start;
+}
+
+static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int error = 0;
+ int i;
+ int len;
+ char *p;
+ int mode = HIBERNATION_INVALID;
+
+ if (!hibernation_available())
+ return -EPERM;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ lock_system_sleep();
+ for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+ if (len == strlen(hibernation_modes[i])
+ && !strncmp(buf, hibernation_modes[i], len)) {
+ mode = i;
+ break;
+ }
+ }
+ if (mode != HIBERNATION_INVALID) {
+ switch (mode) {
+ case HIBERNATION_SHUTDOWN:
+ case HIBERNATION_REBOOT:
+#ifdef CONFIG_SUSPEND
+ case HIBERNATION_SUSPEND:
+#endif
+ hibernation_mode = mode;
+ break;
+ case HIBERNATION_PLATFORM:
+ if (hibernation_ops)
+ hibernation_mode = mode;
+ else
+ error = -EINVAL;
+ }
+ } else
+ error = -EINVAL;
+
+ if (!error)
+ pr_debug("PM: Hibernation mode set to '%s'\n",
+ hibernation_modes[mode]);
+ unlock_system_sleep();
+ return error ? error : n;
+}
+
+power_attr(disk);
+
+static ssize_t resume_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf,"%d:%d\n", MAJOR(swsusp_resume_device),
+ MINOR(swsusp_resume_device));
+}
+
+static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ dev_t res;
+ int len = n;
+ char *name;
+
+ if (len && buf[len-1] == '\n')
+ len--;
+ name = kstrndup(buf, len, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ res = name_to_dev_t(name);
+ kfree(name);
+ if (!res)
+ return -EINVAL;
+
+ lock_system_sleep();
+ swsusp_resume_device = res;
+ unlock_system_sleep();
+ printk(KERN_INFO "PM: Starting manual resume from disk\n");
+ noresume = 0;
+ software_resume();
+ return n;
+}
+
+power_attr(resume);
+
+static ssize_t image_size_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%lu\n", image_size);
+}
+
+static ssize_t image_size_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long size;
+
+ if (sscanf(buf, "%lu", &size) == 1) {
+ image_size = size;
+ return n;
+ }
+
+ return -EINVAL;
+}
+
+power_attr(image_size);
+
+static ssize_t reserved_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", reserved_size);
+}
+
+static ssize_t reserved_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long size;
+
+ if (sscanf(buf, "%lu", &size) == 1) {
+ reserved_size = size;
+ return n;
+ }
+
+ return -EINVAL;
+}
+
+power_attr(reserved_size);
+
+static struct attribute * g[] = {
+ &disk_attr.attr,
+ &resume_attr.attr,
+ &image_size_attr.attr,
+ &reserved_size_attr.attr,
+ NULL,
+};
+
+
+static struct attribute_group attr_group = {
+ .attrs = g,
+};
+
+
+static int __init pm_disk_init(void)
+{
+ return sysfs_create_group(power_kobj, &attr_group);
+}
+
+core_initcall(pm_disk_init);
+
+
+static int __init resume_setup(char *str)
+{
+ if (noresume)
+ return 1;
+
+ strncpy( resume_file, str, 255 );
+ return 1;
+}
+
+static int __init resume_offset_setup(char *str)
+{
+ unsigned long long offset;
+
+ if (noresume)
+ return 1;
+
+ if (sscanf(str, "%llu", &offset) == 1)
+ swsusp_resume_block = offset;
+
+ return 1;
+}
+
+static int __init hibernate_setup(char *str)
+{
+ if (!strncmp(str, "noresume", 8))
+ noresume = 1;
+ else if (!strncmp(str, "nocompress", 10))
+ nocompress = 1;
+ else if (!strncmp(str, "no", 2)) {
+ noresume = 1;
+ nohibernate = 1;
+ }
+ return 1;
+}
+
+static int __init noresume_setup(char *str)
+{
+ noresume = 1;
+ set_toi_state(TOI_NORESUME_SPECIFIED);
+ return 1;
+}
+
+static int __init resumewait_setup(char *str)
+{
+ resume_wait = 1;
+ return 1;
+}
+
+static int __init resumedelay_setup(char *str)
+{
+ int rc = kstrtouint(str, 0, &resume_delay);
+
+ if (rc)
+ return rc;
+ return 1;
+}
+
+static int __init nohibernate_setup(char *str)
+{
+ noresume = 1;
+ nohibernate = 1;
+ return 1;
+}
+
+static int __init kaslr_nohibernate_setup(char *str)
+{
+ return nohibernate_setup(str);
+}
+
+__setup("noresume", noresume_setup);
+__setup("resume_offset=", resume_offset_setup);
+__setup("resume=", resume_setup);
+__setup("hibernate=", hibernate_setup);
+__setup("resumewait", resumewait_setup);
+__setup("resumedelay=", resumedelay_setup);
+__setup("nohibernate", nohibernate_setup);
+__setup("kaslr", kaslr_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
new file mode 100644
index 000000000..86e8157a4
--- /dev/null
+++ b/kernel/power/main.c
@@ -0,0 +1,646 @@
+/*
+ * kernel/power/main.c - PM subsystem core functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ *
+ * This file is released under the GPLv2
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/pm-trace.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "power.h"
+
+DEFINE_MUTEX(pm_mutex);
+
+#ifdef CONFIG_PM_SLEEP
+
+/* Routines for PM-transition notifications */
+
+static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
+
+int register_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&pm_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(register_pm_notifier);
+
+int unregister_pm_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&pm_chain_head, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_pm_notifier);
+
+int pm_notifier_call_chain(unsigned long val)
+{
+ int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL);
+
+ return notifier_to_errno(ret);
+}
+
+/* If set, devices may be suspended and resumed asynchronously. */
+int pm_async_enabled = 1;
+
+static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", pm_async_enabled);
+}
+
+static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+
+ if (val > 1)
+ return -EINVAL;
+
+ pm_async_enabled = val;
+ return n;
+}
+
+power_attr(pm_async);
+
+#ifdef CONFIG_PM_DEBUG
+int pm_test_level = TEST_NONE;
+
+static const char * const pm_tests[__TEST_AFTER_LAST] = {
+ [TEST_NONE] = "none",
+ [TEST_CORE] = "core",
+ [TEST_CPUS] = "processors",
+ [TEST_PLATFORM] = "platform",
+ [TEST_DEVICES] = "devices",
+ [TEST_FREEZER] = "freezer",
+};
+
+static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ char *s = buf;
+ int level;
+
+ for (level = TEST_FIRST; level <= TEST_MAX; level++)
+ if (pm_tests[level]) {
+ if (level == pm_test_level)
+ s += sprintf(s, "[%s] ", pm_tests[level]);
+ else
+ s += sprintf(s, "%s ", pm_tests[level]);
+ }
+
+ if (s != buf)
+ /* convert the last space to a newline */
+ *(s-1) = '\n';
+
+ return (s - buf);
+}
+
+static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ const char * const *s;
+ int level;
+ char *p;
+ int len;
+ int error = -EINVAL;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ lock_system_sleep();
+
+ level = TEST_FIRST;
+ for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
+ if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) {
+ pm_test_level = level;
+ error = 0;
+ break;
+ }
+
+ unlock_system_sleep();
+
+ return error ? error : n;
+}
+
+power_attr(pm_test);
+#endif /* CONFIG_PM_DEBUG */
+
+#ifdef CONFIG_DEBUG_FS
+static char *suspend_step_name(enum suspend_stat_step step)
+{
+ switch (step) {
+ case SUSPEND_FREEZE:
+ return "freeze";
+ case SUSPEND_PREPARE:
+ return "prepare";
+ case SUSPEND_SUSPEND:
+ return "suspend";
+ case SUSPEND_SUSPEND_NOIRQ:
+ return "suspend_noirq";
+ case SUSPEND_RESUME_NOIRQ:
+ return "resume_noirq";
+ case SUSPEND_RESUME:
+ return "resume";
+ default:
+ return "";
+ }
+}
+
+static int suspend_stats_show(struct seq_file *s, void *unused)
+{
+ int i, index, last_dev, last_errno, last_step;
+
+ last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
+ last_dev %= REC_FAILED_NUM;
+ last_errno = suspend_stats.last_failed_errno + REC_FAILED_NUM - 1;
+ last_errno %= REC_FAILED_NUM;
+ last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
+ last_step %= REC_FAILED_NUM;
+ seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n"
+ "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n",
+ "success", suspend_stats.success,
+ "fail", suspend_stats.fail,
+ "failed_freeze", suspend_stats.failed_freeze,
+ "failed_prepare", suspend_stats.failed_prepare,
+ "failed_suspend", suspend_stats.failed_suspend,
+ "failed_suspend_late",
+ suspend_stats.failed_suspend_late,
+ "failed_suspend_noirq",
+ suspend_stats.failed_suspend_noirq,
+ "failed_resume", suspend_stats.failed_resume,
+ "failed_resume_early",
+ suspend_stats.failed_resume_early,
+ "failed_resume_noirq",
+ suspend_stats.failed_resume_noirq);
+ seq_printf(s, "failures:\n last_failed_dev:\t%-s\n",
+ suspend_stats.failed_devs[last_dev]);
+ for (i = 1; i < REC_FAILED_NUM; i++) {
+ index = last_dev + REC_FAILED_NUM - i;
+ index %= REC_FAILED_NUM;
+ seq_printf(s, "\t\t\t%-s\n",
+ suspend_stats.failed_devs[index]);
+ }
+ seq_printf(s, " last_failed_errno:\t%-d\n",
+ suspend_stats.errno[last_errno]);
+ for (i = 1; i < REC_FAILED_NUM; i++) {
+ index = last_errno + REC_FAILED_NUM - i;
+ index %= REC_FAILED_NUM;
+ seq_printf(s, "\t\t\t%-d\n",
+ suspend_stats.errno[index]);
+ }
+ seq_printf(s, " last_failed_step:\t%-s\n",
+ suspend_step_name(
+ suspend_stats.failed_steps[last_step]));
+ for (i = 1; i < REC_FAILED_NUM; i++) {
+ index = last_step + REC_FAILED_NUM - i;
+ index %= REC_FAILED_NUM;
+ seq_printf(s, "\t\t\t%-s\n",
+ suspend_step_name(
+ suspend_stats.failed_steps[index]));
+ }
+
+ return 0;
+}
+
+static int suspend_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, suspend_stats_show, NULL);
+}
+
+static const struct file_operations suspend_stats_operations = {
+ .open = suspend_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init pm_debugfs_init(void)
+{
+ debugfs_create_file("suspend_stats", S_IFREG | S_IRUGO,
+ NULL, NULL, &suspend_stats_operations);
+ return 0;
+}
+
+late_initcall(pm_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM_SLEEP_DEBUG
+/*
+ * pm_print_times: print time taken by devices to suspend and resume.
+ *
+ * show() returns whether printing of suspend and resume times is enabled.
+ * store() accepts 0 or 1. 0 disables printing and 1 enables it.
+ */
+bool pm_print_times_enabled;
+
+static ssize_t pm_print_times_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", pm_print_times_enabled);
+}
+
+static ssize_t pm_print_times_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+
+ if (val > 1)
+ return -EINVAL;
+
+ pm_print_times_enabled = !!val;
+ return n;
+}
+
+power_attr(pm_print_times);
+
+static inline void pm_print_times_init(void)
+{
+ pm_print_times_enabled = !!initcall_debug;
+}
+#else /* !CONFIG_PP_SLEEP_DEBUG */
+static inline void pm_print_times_init(void) {}
+#endif /* CONFIG_PM_SLEEP_DEBUG */
+
+struct kobject *power_kobj;
+
+/**
+ * state - control system sleep states.
+ *
+ * show() returns available sleep state labels, which may be "mem", "standby",
+ * "freeze" and "disk" (hibernation). See Documentation/power/states.txt for a
+ * description of what they mean.
+ *
+ * store() accepts one of those strings, translates it into the proper
+ * enumerated value, and initiates a suspend transition.
+ */
+static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ char *s = buf;
+#ifdef CONFIG_SUSPEND
+ suspend_state_t i;
+
+ for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
+ if (pm_states[i])
+ s += sprintf(s,"%s ", pm_states[i]);
+
+#endif
+ if (hibernation_available())
+ s += sprintf(s, "disk ");
+ if (s != buf)
+ /* convert the last space to a newline */
+ *(s-1) = '\n';
+ return (s - buf);
+}
+
+static suspend_state_t decode_state(const char *buf, size_t n)
+{
+#ifdef CONFIG_SUSPEND
+ suspend_state_t state;
+#endif
+ char *p;
+ int len;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ /* Check hibernation first. */
+ if (len == 4 && !strncmp(buf, "disk", len))
+ return PM_SUSPEND_MAX;
+
+#ifdef CONFIG_SUSPEND
+ for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) {
+ const char *label = pm_states[state];
+
+ if (label && len == strlen(label) && !strncmp(buf, label, len))
+ return state;
+ }
+#endif
+
+ return PM_SUSPEND_ON;
+}
+
+static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ suspend_state_t state;
+ int error;
+
+ error = pm_autosleep_lock();
+ if (error)
+ return error;
+
+ if (pm_autosleep_state() > PM_SUSPEND_ON) {
+ error = -EBUSY;
+ goto out;
+ }
+
+ state = decode_state(buf, n);
+ if (state < PM_SUSPEND_MAX)
+ error = pm_suspend(state);
+ else if (state == PM_SUSPEND_MAX)
+ error = hibernate();
+ else
+ error = -EINVAL;
+
+ out:
+ pm_autosleep_unlock();
+ return error ? error : n;
+}
+
+power_attr(state);
+
+#ifdef CONFIG_PM_SLEEP
+/*
+ * The 'wakeup_count' attribute, along with the functions defined in
+ * drivers/base/power/wakeup.c, provides a means by which wakeup events can be
+ * handled in a non-racy way.
+ *
+ * If a wakeup event occurs when the system is in a sleep state, it simply is
+ * woken up. In turn, if an event that would wake the system up from a sleep
+ * state occurs when it is undergoing a transition to that sleep state, the
+ * transition should be aborted. Moreover, if such an event occurs when the
+ * system is in the working state, an attempt to start a transition to the
+ * given sleep state should fail during certain period after the detection of
+ * the event. Using the 'state' attribute alone is not sufficient to satisfy
+ * these requirements, because a wakeup event may occur exactly when 'state'
+ * is being written to and may be delivered to user space right before it is
+ * frozen, so the event will remain only partially processed until the system is
+ * woken up by another event. In particular, it won't cause the transition to
+ * a sleep state to be aborted.
+ *
+ * This difficulty may be overcome if user space uses 'wakeup_count' before
+ * writing to 'state'. It first should read from 'wakeup_count' and store
+ * the read value. Then, after carrying out its own preparations for the system
+ * transition to a sleep state, it should write the stored value to
+ * 'wakeup_count'. If that fails, at least one wakeup event has occurred since
+ * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it
+ * is allowed to write to 'state', but the transition will be aborted if there
+ * are any wakeup events detected after 'wakeup_count' was written to.
+ */
+
+static ssize_t wakeup_count_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ unsigned int val;
+
+ return pm_get_wakeup_count(&val, true) ?
+ sprintf(buf, "%u\n", val) : -EINTR;
+}
+
+static ssize_t wakeup_count_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned int val;
+ int error;
+
+ error = pm_autosleep_lock();
+ if (error)
+ return error;
+
+ if (pm_autosleep_state() > PM_SUSPEND_ON) {
+ error = -EBUSY;
+ goto out;
+ }
+
+ error = -EINVAL;
+ if (sscanf(buf, "%u", &val) == 1) {
+ if (pm_save_wakeup_count(val))
+ error = n;
+ else
+ pm_print_active_wakeup_sources();
+ }
+
+ out:
+ pm_autosleep_unlock();
+ return error;
+}
+
+power_attr(wakeup_count);
+
+#ifdef CONFIG_PM_AUTOSLEEP
+static ssize_t autosleep_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ suspend_state_t state = pm_autosleep_state();
+
+ if (state == PM_SUSPEND_ON)
+ return sprintf(buf, "off\n");
+
+#ifdef CONFIG_SUSPEND
+ if (state < PM_SUSPEND_MAX)
+ return sprintf(buf, "%s\n", pm_states[state] ?
+ pm_states[state] : "error");
+#endif
+#ifdef CONFIG_HIBERNATION
+ return sprintf(buf, "disk\n");
+#else
+ return sprintf(buf, "error");
+#endif
+}
+
+static ssize_t autosleep_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ suspend_state_t state = decode_state(buf, n);
+ int error;
+
+ if (state == PM_SUSPEND_ON
+ && strcmp(buf, "off") && strcmp(buf, "off\n"))
+ return -EINVAL;
+
+ error = pm_autosleep_set_state(state);
+ return error ? error : n;
+}
+
+power_attr(autosleep);
+#endif /* CONFIG_PM_AUTOSLEEP */
+
+#ifdef CONFIG_PM_WAKELOCKS
+static ssize_t wake_lock_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return pm_show_wakelocks(buf, true);
+}
+
+static ssize_t wake_lock_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int error = pm_wake_lock(buf);
+ return error ? error : n;
+}
+
+power_attr(wake_lock);
+
+static ssize_t wake_unlock_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return pm_show_wakelocks(buf, false);
+}
+
+static ssize_t wake_unlock_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int error = pm_wake_unlock(buf);
+ return error ? error : n;
+}
+
+power_attr(wake_unlock);
+
+#endif /* CONFIG_PM_WAKELOCKS */
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM_TRACE
+int pm_trace_enabled;
+
+static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", pm_trace_enabled);
+}
+
+static ssize_t
+pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int val;
+
+ if (sscanf(buf, "%d", &val) == 1) {
+ pm_trace_enabled = !!val;
+ if (pm_trace_enabled) {
+ pr_warn("PM: Enabling pm_trace changes system date and time during resume.\n"
+ "PM: Correct system time has to be restored manually after resume.\n");
+ }
+ return n;
+ }
+ return -EINVAL;
+}
+
+power_attr(pm_trace);
+
+static ssize_t pm_trace_dev_match_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return show_trace_dev_match(buf, PAGE_SIZE);
+}
+
+static ssize_t
+pm_trace_dev_match_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ return -EINVAL;
+}
+
+power_attr(pm_trace_dev_match);
+
+#endif /* CONFIG_PM_TRACE */
+
+#ifdef CONFIG_FREEZER
+static ssize_t pm_freeze_timeout_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", freeze_timeout_msecs);
+}
+
+static ssize_t pm_freeze_timeout_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+
+ freeze_timeout_msecs = val;
+ return n;
+}
+
+power_attr(pm_freeze_timeout);
+
+#endif /* CONFIG_FREEZER*/
+
+static struct attribute * g[] = {
+ &state_attr.attr,
+#ifdef CONFIG_PM_TRACE
+ &pm_trace_attr.attr,
+ &pm_trace_dev_match_attr.attr,
+#endif
+#ifdef CONFIG_PM_SLEEP
+ &pm_async_attr.attr,
+ &wakeup_count_attr.attr,
+#ifdef CONFIG_PM_AUTOSLEEP
+ &autosleep_attr.attr,
+#endif
+#ifdef CONFIG_PM_WAKELOCKS
+ &wake_lock_attr.attr,
+ &wake_unlock_attr.attr,
+#endif
+#ifdef CONFIG_PM_DEBUG
+ &pm_test_attr.attr,
+#endif
+#ifdef CONFIG_PM_SLEEP_DEBUG
+ &pm_print_times_attr.attr,
+#endif
+#endif
+#ifdef CONFIG_FREEZER
+ &pm_freeze_timeout_attr.attr,
+#endif
+ NULL,
+};
+
+static struct attribute_group attr_group = {
+ .attrs = g,
+};
+
+struct workqueue_struct *pm_wq;
+EXPORT_SYMBOL_GPL(pm_wq);
+
+static int __init pm_start_workqueue(void)
+{
+ pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0);
+
+ return pm_wq ? 0 : -ENOMEM;
+}
+
+static int __init pm_init(void)
+{
+ int error = pm_start_workqueue();
+ if (error)
+ return error;
+ hibernate_image_size_init();
+ hibernate_reserved_size_init();
+ power_kobj = kobject_create_and_add("power", NULL);
+ if (!power_kobj)
+ return -ENOMEM;
+ error = sysfs_create_group(power_kobj, &attr_group);
+ if (error)
+ return error;
+ pm_print_times_init();
+ return pm_autosleep_init();
+}
+
+core_initcall(pm_init);
diff --git a/kernel/power/power.h b/kernel/power/power.h
new file mode 100644
index 000000000..095ed9f03
--- /dev/null
+++ b/kernel/power/power.h
@@ -0,0 +1,335 @@
+#include <linux/suspend.h>
+#include <linux/suspend_ioctls.h>
+#include <linux/utsname.h>
+#include <linux/freezer.h>
+#include <linux/compiler.h>
+
+struct swsusp_info {
+ struct new_utsname uts;
+ u32 version_code;
+ unsigned long num_physpages;
+ int cpus;
+ unsigned long image_pages;
+ unsigned long pages;
+ unsigned long size;
+} __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_HIBERNATION
+/* kernel/power/snapshot.c */
+extern void __init hibernate_reserved_size_init(void);
+extern void __init hibernate_image_size_init(void);
+
+#ifdef CONFIG_ARCH_HIBERNATION_HEADER
+/* Maximum size of architecture specific data in a hibernation header */
+#define MAX_ARCH_HEADER_SIZE (sizeof(struct new_utsname) + 4)
+
+extern int arch_hibernation_header_save(void *addr, unsigned int max_size);
+extern int arch_hibernation_header_restore(void *addr);
+
+static inline int init_header_complete(struct swsusp_info *info)
+{
+ return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE);
+}
+
+static inline char *check_image_kernel(struct swsusp_info *info)
+{
+ return arch_hibernation_header_restore(info) ?
+ "architecture specific data" : NULL;
+}
+#else
+extern char *check_image_kernel(struct swsusp_info *info);
+#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+extern int init_header(struct swsusp_info *info);
+
+extern char resume_file[256];
+/*
+ * Keep some memory free so that I/O operations can succeed without paging
+ * [Might this be more than 4 MB?]
+ */
+#define PAGES_FOR_IO ((4096 * 1024) >> PAGE_SHIFT)
+
+/*
+ * Keep 1 MB of memory free so that device drivers can allocate some pages in
+ * their .suspend() routines without breaking the suspend to disk.
+ */
+#define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT)
+
+asmlinkage int swsusp_save(void);
+
+/* kernel/power/hibernate.c */
+extern bool freezer_test_done;
+
+extern int hibernation_snapshot(int platform_mode);
+extern int hibernation_restore(int platform_mode);
+extern int hibernation_platform_enter(void);
+
+#else /* !CONFIG_HIBERNATION */
+
+static inline void hibernate_reserved_size_init(void) {}
+static inline void hibernate_image_size_init(void) {}
+#endif /* !CONFIG_HIBERNATION */
+
+extern int pfn_is_nosave(unsigned long);
+
+#define power_attr(_name) \
+static struct kobj_attribute _name##_attr = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = 0644, \
+ }, \
+ .show = _name##_show, \
+ .store = _name##_store, \
+}
+
+extern struct pbe *restore_pblist;
+
+/* Preferred image size in bytes (default 500 MB) */
+extern unsigned long image_size;
+/* Size of memory reserved for drivers (default SPARE_PAGES x PAGE_SIZE) */
+extern unsigned long reserved_size;
+extern int in_suspend;
+extern dev_t swsusp_resume_device;
+extern sector_t swsusp_resume_block;
+
+extern asmlinkage int swsusp_arch_suspend(void);
+extern asmlinkage int swsusp_arch_resume(void);
+
+extern int create_basic_memory_bitmaps(void);
+extern void free_basic_memory_bitmaps(void);
+extern int hibernate_preallocate_memory(void);
+
+/**
+ * Auxiliary structure used for reading the snapshot image data and
+ * metadata from and writing them to the list of page backup entries
+ * (PBEs) which is the main data structure of swsusp.
+ *
+ * Using struct snapshot_handle we can transfer the image, including its
+ * metadata, as a continuous sequence of bytes with the help of
+ * snapshot_read_next() and snapshot_write_next().
+ *
+ * The code that writes the image to a storage or transfers it to
+ * the user land is required to use snapshot_read_next() for this
+ * purpose and it should not make any assumptions regarding the internal
+ * structure of the image. Similarly, the code that reads the image from
+ * a storage or transfers it from the user land is required to use
+ * snapshot_write_next().
+ *
+ * This may allow us to change the internal structure of the image
+ * in the future with considerably less effort.
+ */
+
+struct snapshot_handle {
+ unsigned int cur; /* number of the block of PAGE_SIZE bytes the
+ * next operation will refer to (ie. current)
+ */
+ void *buffer; /* address of the block to read from
+ * or write to
+ */
+ int sync_read; /* Set to one to notify the caller of
+ * snapshot_write_next() that it may
+ * need to call wait_on_bio_chain()
+ */
+};
+
+/* This macro returns the address from/to which the caller of
+ * snapshot_read_next()/snapshot_write_next() is allowed to
+ * read/write data after the function returns
+ */
+#define data_of(handle) ((handle).buffer)
+
+extern unsigned int snapshot_additional_pages(struct zone *zone);
+extern unsigned long snapshot_get_image_size(void);
+extern int snapshot_read_next(struct snapshot_handle *handle);
+extern int snapshot_write_next(struct snapshot_handle *handle);
+extern void snapshot_write_finalize(struct snapshot_handle *handle);
+extern int snapshot_image_loaded(struct snapshot_handle *handle);
+
+/* If unset, the snapshot device cannot be open. */
+extern atomic_t snapshot_device_available;
+
+extern sector_t alloc_swapdev_block(int swap);
+extern void free_all_swap_pages(int swap);
+extern int swsusp_swap_in_use(void);
+
+/*
+ * Flags that can be passed from the hibernatig hernel to the "boot" kernel in
+ * the image header.
+ */
+#define SF_PLATFORM_MODE 1
+#define SF_NOCOMPRESS_MODE 2
+#define SF_CRC32_MODE 4
+
+/* kernel/power/hibernate.c */
+extern int swsusp_check(void);
+extern void swsusp_free(void);
+extern int swsusp_read(unsigned int *flags_p);
+extern int swsusp_write(unsigned int flags);
+extern void swsusp_close(fmode_t);
+#ifdef CONFIG_SUSPEND
+extern int swsusp_unmark(void);
+#endif
+
+/* kernel/power/block_io.c */
+extern struct block_device *hib_resume_bdev;
+
+extern int hib_bio_read_page(pgoff_t page_off, void *addr,
+ struct bio **bio_chain);
+extern int hib_bio_write_page(pgoff_t page_off, void *addr,
+ struct bio **bio_chain);
+extern int hib_wait_on_bio_chain(struct bio **bio_chain);
+
+struct timeval;
+/* kernel/power/swsusp.c */
+extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *);
+
+#ifdef CONFIG_SUSPEND
+/* kernel/power/suspend.c */
+extern const char *pm_labels[];
+extern const char *pm_states[];
+
+extern int suspend_devices_and_enter(suspend_state_t state);
+#else /* !CONFIG_SUSPEND */
+static inline int suspend_devices_and_enter(suspend_state_t state)
+{
+ return -ENOSYS;
+}
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_PM_TEST_SUSPEND
+/* kernel/power/suspend_test.c */
+extern void suspend_test_start(void);
+extern void suspend_test_finish(const char *label);
+#else /* !CONFIG_PM_TEST_SUSPEND */
+static inline void suspend_test_start(void) {}
+static inline void suspend_test_finish(const char *label) {}
+#endif /* !CONFIG_PM_TEST_SUSPEND */
+
+#ifdef CONFIG_PM_SLEEP
+/* kernel/power/main.c */
+extern int pm_notifier_call_chain(unsigned long val);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+int restore_highmem(void);
+#else
+static inline unsigned int count_highmem_pages(void) { return 0; }
+static inline int restore_highmem(void) { return 0; }
+#endif
+
+/*
+ * Suspend test levels
+ */
+enum {
+ /* keep first */
+ TEST_NONE,
+ TEST_CORE,
+ TEST_CPUS,
+ TEST_PLATFORM,
+ TEST_DEVICES,
+ TEST_FREEZER,
+ /* keep last */
+ __TEST_AFTER_LAST
+};
+
+#define TEST_FIRST TEST_NONE
+#define TEST_MAX (__TEST_AFTER_LAST - 1)
+
+extern int pm_test_level;
+
+#ifdef CONFIG_SUSPEND_FREEZER
+static inline int suspend_freeze_processes(void)
+{
+ int error;
+
+ error = freeze_processes();
+ /*
+ * freeze_processes() automatically thaws every task if freezing
+ * fails. So we need not do anything extra upon error.
+ */
+ if (error)
+ return error;
+
+ error = freeze_kernel_threads();
+ /*
+ * freeze_kernel_threads() thaws only kernel threads upon freezing
+ * failure. So we have to thaw the userspace tasks ourselves.
+ */
+ if (error)
+ thaw_processes();
+
+ return error;
+}
+
+static inline void suspend_thaw_processes(void)
+{
+ thaw_processes();
+}
+#else
+static inline int suspend_freeze_processes(void)
+{
+ return 0;
+}
+
+static inline void suspend_thaw_processes(void)
+{
+}
+#endif
+
+extern struct page *saveable_page(struct zone *z, unsigned long p);
+#ifdef CONFIG_HIGHMEM
+struct page *saveable_highmem_page(struct zone *z, unsigned long p);
+#else
+static
+inline void *saveable_highmem_page(struct zone *z, unsigned long p)
+{
+ return NULL;
+}
+#endif
+
+#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
+extern struct list_head nosave_regions;
+
+/**
+ * This structure represents a range of page frames the contents of which
+ * should not be saved during the suspend.
+ */
+
+struct nosave_region {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+};
+
+#ifdef CONFIG_PM_AUTOSLEEP
+
+/* kernel/power/autosleep.c */
+extern int pm_autosleep_init(void);
+extern int pm_autosleep_lock(void);
+extern void pm_autosleep_unlock(void);
+extern suspend_state_t pm_autosleep_state(void);
+extern int pm_autosleep_set_state(suspend_state_t state);
+
+#else /* !CONFIG_PM_AUTOSLEEP */
+
+static inline int pm_autosleep_init(void) { return 0; }
+static inline int pm_autosleep_lock(void) { return 0; }
+static inline void pm_autosleep_unlock(void) {}
+static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; }
+
+#endif /* !CONFIG_PM_AUTOSLEEP */
+
+#ifdef CONFIG_PM_WAKELOCKS
+
+/* kernel/power/wakelock.c */
+extern ssize_t pm_show_wakelocks(char *buf, bool show_active);
+extern int pm_wake_lock(const char *buf);
+extern int pm_wake_unlock(const char *buf);
+
+#endif /* !CONFIG_PM_WAKELOCKS */
+
+#ifdef CONFIG_TOI
+unsigned long toi_get_nonconflicting_page(void);
+#define BM_END_OF_MAP (~0UL)
+#else
+#define toi_get_nonconflicting_page() (0)
+#endif
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
new file mode 100644
index 000000000..7ef6866b5
--- /dev/null
+++ b/kernel/power/poweroff.c
@@ -0,0 +1,46 @@
+/*
+ * poweroff.c - sysrq handler to gracefully power down machine.
+ *
+ * This file is released under the GPL v2
+ */
+
+#include <linux/kernel.h>
+#include <linux/sysrq.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/workqueue.h>
+#include <linux/reboot.h>
+#include <linux/cpumask.h>
+
+/*
+ * When the user hits Sys-Rq o to power down the machine this is the
+ * callback we use.
+ */
+
+static void do_poweroff(struct work_struct *dummy)
+{
+ kernel_power_off();
+}
+
+static DECLARE_WORK(poweroff_work, do_poweroff);
+
+static void handle_poweroff(int key)
+{
+ /* run sysrq poweroff on boot cpu */
+ schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
+}
+
+static struct sysrq_key_op sysrq_poweroff_op = {
+ .handler = handle_poweroff,
+ .help_msg = "poweroff(o)",
+ .action_msg = "Power Off",
+ .enable_mask = SYSRQ_ENABLE_BOOT,
+};
+
+static int __init pm_sysrq_init(void)
+{
+ register_sysrq_key('o', &sysrq_poweroff_op);
+ return 0;
+}
+
+subsys_initcall(pm_sysrq_init);
diff --git a/kernel/power/process.c b/kernel/power/process.c
new file mode 100644
index 000000000..564f786df
--- /dev/null
+++ b/kernel/power/process.c
@@ -0,0 +1,237 @@
+/*
+ * drivers/power/process.c - Functions for starting/stopping processes on
+ * suspend transitions.
+ *
+ * Originally from swsusp.
+ */
+
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/oom.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/freezer.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/kmod.h>
+#include <trace/events/power.h>
+
+/*
+ * Timeout for stopping processes
+ */
+unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC;
+
+static int try_to_freeze_tasks(bool user_only)
+{
+ struct task_struct *g, *p;
+ unsigned long end_time;
+ unsigned int todo;
+ bool wq_busy = false;
+ struct timeval start, end;
+ u64 elapsed_msecs64;
+ unsigned int elapsed_msecs;
+ bool wakeup = false;
+ int sleep_usecs = USEC_PER_MSEC;
+
+ do_gettimeofday(&start);
+
+ end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs);
+
+ if (!user_only)
+ freeze_workqueues_begin();
+
+ while (true) {
+ todo = 0;
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, p) {
+ if (p == current || !freeze_task(p))
+ continue;
+
+ if (!freezer_should_skip(p))
+ todo++;
+ }
+ read_unlock(&tasklist_lock);
+
+ if (!user_only) {
+ wq_busy = freeze_workqueues_busy();
+ todo += wq_busy;
+ }
+
+ if (!todo || time_after(jiffies, end_time))
+ break;
+
+ if (pm_wakeup_pending()) {
+ wakeup = true;
+ break;
+ }
+
+ /*
+ * We need to retry, but first give the freezing tasks some
+ * time to enter the refrigerator. Start with an initial
+ * 1 ms sleep followed by exponential backoff until 8 ms.
+ */
+ usleep_range(sleep_usecs / 2, sleep_usecs);
+ if (sleep_usecs < 8 * USEC_PER_MSEC)
+ sleep_usecs *= 2;
+ }
+
+ do_gettimeofday(&end);
+ elapsed_msecs64 = timeval_to_ns(&end) - timeval_to_ns(&start);
+ do_div(elapsed_msecs64, NSEC_PER_MSEC);
+ elapsed_msecs = elapsed_msecs64;
+
+ if (todo) {
+ pr_cont("\n");
+ pr_err("Freezing of tasks %s after %d.%03d seconds "
+ "(%d tasks refusing to freeze, wq_busy=%d):\n",
+ wakeup ? "aborted" : "failed",
+ elapsed_msecs / 1000, elapsed_msecs % 1000,
+ todo - wq_busy, wq_busy);
+
+ if (!wakeup) {
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, p) {
+ if (p != current && !freezer_should_skip(p)
+ && freezing(p) && !frozen(p))
+ sched_show_task(p);
+ }
+ read_unlock(&tasklist_lock);
+ }
+ } else {
+ pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000,
+ elapsed_msecs % 1000);
+ }
+
+ return todo ? -EBUSY : 0;
+}
+
+/**
+ * freeze_processes - Signal user space processes to enter the refrigerator.
+ * The current thread will not be frozen. The same process that calls
+ * freeze_processes must later call thaw_processes.
+ *
+ * On success, returns 0. On failure, -errno and system is fully thawed.
+ */
+int freeze_processes(void)
+{
+ int error;
+
+ error = __usermodehelper_disable(UMH_FREEZING);
+ if (error)
+ return error;
+
+ /* Make sure this task doesn't get frozen */
+ current->flags |= PF_SUSPEND_TASK;
+
+ if (!pm_freezing)
+ atomic_inc(&system_freezing_cnt);
+
+ pm_wakeup_clear();
+ pr_info("Freezing user space processes ... ");
+ pm_freezing = true;
+ error = try_to_freeze_tasks(true);
+ if (!error) {
+ __usermodehelper_set_disable_depth(UMH_DISABLED);
+ pr_cont("done.");
+ }
+ pr_cont("\n");
+ BUG_ON(in_atomic());
+
+ /*
+ * Now that the whole userspace is frozen we need to disbale
+ * the OOM killer to disallow any further interference with
+ * killable tasks.
+ */
+ if (!error && !oom_killer_disable())
+ error = -EBUSY;
+
+ if (error)
+ thaw_processes();
+ return error;
+}
+
+/**
+ * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
+ *
+ * On success, returns 0. On failure, -errno and only the kernel threads are
+ * thawed, so as to give a chance to the caller to do additional cleanups
+ * (if any) before thawing the userspace tasks. So, it is the responsibility
+ * of the caller to thaw the userspace tasks, when the time is right.
+ */
+int freeze_kernel_threads(void)
+{
+ int error;
+
+ pr_info("Freezing remaining freezable tasks ... ");
+
+ pm_nosig_freezing = true;
+ error = try_to_freeze_tasks(false);
+ if (!error)
+ pr_cont("done.");
+
+ pr_cont("\n");
+ BUG_ON(in_atomic());
+
+ if (error)
+ thaw_kernel_threads();
+ return error;
+}
+
+void thaw_processes(void)
+{
+ struct task_struct *g, *p;
+ struct task_struct *curr = current;
+
+ trace_suspend_resume(TPS("thaw_processes"), 0, true);
+ if (pm_freezing)
+ atomic_dec(&system_freezing_cnt);
+ pm_freezing = false;
+ pm_nosig_freezing = false;
+
+ oom_killer_enable();
+
+ pr_info("Restarting tasks ... ");
+
+ __usermodehelper_set_disable_depth(UMH_FREEZING);
+ thaw_workqueues();
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, p) {
+ /* No other threads should have PF_SUSPEND_TASK set */
+ WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
+ __thaw_task(p);
+ }
+ read_unlock(&tasklist_lock);
+
+ WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
+ curr->flags &= ~PF_SUSPEND_TASK;
+
+ usermodehelper_enable();
+
+ schedule();
+ pr_cont("done.\n");
+ trace_suspend_resume(TPS("thaw_processes"), 0, false);
+}
+
+void thaw_kernel_threads(void)
+{
+ struct task_struct *g, *p;
+
+ pm_nosig_freezing = false;
+ pr_info("Restarting kernel threads ... ");
+
+ thaw_workqueues();
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, p) {
+ if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
+ __thaw_task(p);
+ }
+ read_unlock(&tasklist_lock);
+
+ schedule();
+ pr_cont("done.\n");
+}
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
new file mode 100644
index 000000000..97b0df713
--- /dev/null
+++ b/kernel/power/qos.c
@@ -0,0 +1,713 @@
+/*
+ * This module exposes the interface to kernel space for specifying
+ * QoS dependencies. It provides infrastructure for registration of:
+ *
+ * Dependents on a QoS value : register requests
+ * Watchers of QoS value : get notified when target QoS value changes
+ *
+ * This QoS design is best effort based. Dependents register their QoS needs.
+ * Watchers register to keep track of the current QoS needs of the system.
+ *
+ * There are 3 basic classes of QoS parameter: latency, timeout, throughput
+ * each have defined units:
+ * latency: usec
+ * timeout: usec <-- currently not used.
+ * throughput: kbs (kilo byte / sec)
+ *
+ * There are lists of pm_qos_objects each one wrapping requests, notifiers
+ *
+ * User mode requests on a QOS parameter register themselves to the
+ * subsystem by opening the device node /dev/... and writing there request to
+ * the node. As long as the process holds a file handle open to the node the
+ * client continues to be accounted for. Upon file release the usermode
+ * request is removed and a new qos target is computed. This way when the
+ * request that the application has is cleaned up when closes the file
+ * pointer or exits the pm_qos_object will get an opportunity to clean up.
+ *
+ * Mark Gross <mgross@linux.intel.com>
+ */
+
+/*#define DEBUG*/
+
+#include <linux/pm_qos.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <trace/events/power.h>
+
+/*
+ * locking rule: all changes to constraints or notifiers lists
+ * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
+ * held, taken with _irqsave. One lock to rule them all
+ */
+struct pm_qos_object {
+ struct pm_qos_constraints *constraints;
+ struct miscdevice pm_qos_power_miscdev;
+ char *name;
+};
+
+static DEFINE_SPINLOCK(pm_qos_lock);
+
+static struct pm_qos_object null_pm_qos;
+
+static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
+static struct pm_qos_constraints cpu_dma_constraints = {
+ .list = PLIST_HEAD_INIT(cpu_dma_constraints.list),
+ .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+ .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+ .no_constraint_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
+ .type = PM_QOS_MIN,
+ .notifiers = &cpu_dma_lat_notifier,
+};
+static struct pm_qos_object cpu_dma_pm_qos = {
+ .constraints = &cpu_dma_constraints,
+ .name = "cpu_dma_latency",
+};
+
+static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
+static struct pm_qos_constraints network_lat_constraints = {
+ .list = PLIST_HEAD_INIT(network_lat_constraints.list),
+ .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+ .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+ .no_constraint_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
+ .type = PM_QOS_MIN,
+ .notifiers = &network_lat_notifier,
+};
+static struct pm_qos_object network_lat_pm_qos = {
+ .constraints = &network_lat_constraints,
+ .name = "network_latency",
+};
+
+
+static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
+static struct pm_qos_constraints network_tput_constraints = {
+ .list = PLIST_HEAD_INIT(network_tput_constraints.list),
+ .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+ .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+ .no_constraint_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
+ .type = PM_QOS_MAX,
+ .notifiers = &network_throughput_notifier,
+};
+static struct pm_qos_object network_throughput_pm_qos = {
+ .constraints = &network_tput_constraints,
+ .name = "network_throughput",
+};
+
+
+static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier);
+static struct pm_qos_constraints memory_bw_constraints = {
+ .list = PLIST_HEAD_INIT(memory_bw_constraints.list),
+ .target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+ .default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+ .no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+ .type = PM_QOS_SUM,
+ .notifiers = &memory_bandwidth_notifier,
+};
+static struct pm_qos_object memory_bandwidth_pm_qos = {
+ .constraints = &memory_bw_constraints,
+ .name = "memory_bandwidth",
+};
+
+
+static struct pm_qos_object *pm_qos_array[] = {
+ &null_pm_qos,
+ &cpu_dma_pm_qos,
+ &network_lat_pm_qos,
+ &network_throughput_pm_qos,
+ &memory_bandwidth_pm_qos,
+};
+
+static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos);
+static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos);
+static int pm_qos_power_open(struct inode *inode, struct file *filp);
+static int pm_qos_power_release(struct inode *inode, struct file *filp);
+
+static const struct file_operations pm_qos_power_fops = {
+ .write = pm_qos_power_write,
+ .read = pm_qos_power_read,
+ .open = pm_qos_power_open,
+ .release = pm_qos_power_release,
+ .llseek = noop_llseek,
+};
+
+/* unlocked internal variant */
+static inline int pm_qos_get_value(struct pm_qos_constraints *c)
+{
+ struct plist_node *node;
+ int total_value = 0;
+
+ if (plist_head_empty(&c->list))
+ return c->no_constraint_value;
+
+ switch (c->type) {
+ case PM_QOS_MIN:
+ return plist_first(&c->list)->prio;
+
+ case PM_QOS_MAX:
+ return plist_last(&c->list)->prio;
+
+ case PM_QOS_SUM:
+ plist_for_each(node, &c->list)
+ total_value += node->prio;
+
+ return total_value;
+
+ default:
+ /* runtime check for not using enum */
+ BUG();
+ return PM_QOS_DEFAULT_VALUE;
+ }
+}
+
+s32 pm_qos_read_value(struct pm_qos_constraints *c)
+{
+ return c->target_value;
+}
+
+static inline void pm_qos_set_value(struct pm_qos_constraints *c, s32 value)
+{
+ c->target_value = value;
+}
+
+static inline int pm_qos_get_value(struct pm_qos_constraints *c);
+static int pm_qos_dbg_show_requests(struct seq_file *s, void *unused)
+{
+ struct pm_qos_object *qos = (struct pm_qos_object *)s->private;
+ struct pm_qos_constraints *c;
+ struct pm_qos_request *req;
+ char *type;
+ unsigned long flags;
+ int tot_reqs = 0;
+ int active_reqs = 0;
+
+ if (IS_ERR_OR_NULL(qos)) {
+ pr_err("%s: bad qos param!\n", __func__);
+ return -EINVAL;
+ }
+ c = qos->constraints;
+ if (IS_ERR_OR_NULL(c)) {
+ pr_err("%s: Bad constraints on qos?\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Lock to ensure we have a snapshot */
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ if (plist_head_empty(&c->list)) {
+ seq_puts(s, "Empty!\n");
+ goto out;
+ }
+
+ switch (c->type) {
+ case PM_QOS_MIN:
+ type = "Minimum";
+ break;
+ case PM_QOS_MAX:
+ type = "Maximum";
+ break;
+ case PM_QOS_SUM:
+ type = "Sum";
+ break;
+ default:
+ type = "Unknown";
+ }
+
+ plist_for_each_entry(req, &c->list, node) {
+ char *state = "Default";
+
+ if ((req->node).prio != c->default_value) {
+ active_reqs++;
+ state = "Active";
+ }
+ tot_reqs++;
+ seq_printf(s, "%d: %d: %s\n", tot_reqs,
+ (req->node).prio, state);
+ }
+
+ seq_printf(s, "Type=%s, Value=%d, Requests: active=%d / total=%d\n",
+ type, pm_qos_get_value(c), active_reqs, tot_reqs);
+
+out:
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+ return 0;
+}
+
+static int pm_qos_dbg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, pm_qos_dbg_show_requests,
+ inode->i_private);
+}
+
+static const struct file_operations pm_qos_debug_fops = {
+ .open = pm_qos_dbg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/**
+ * pm_qos_update_target - manages the constraints list and calls the notifiers
+ * if needed
+ * @c: constraints data struct
+ * @node: request to add to the list, to update or to remove
+ * @action: action to take on the constraints list
+ * @value: value of the request to add or update
+ *
+ * This function returns 1 if the aggregated constraint value has changed, 0
+ * otherwise.
+ */
+int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
+ enum pm_qos_req_action action, int value)
+{
+ unsigned long flags;
+ int prev_value, curr_value, new_value;
+ int ret;
+
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ prev_value = pm_qos_get_value(c);
+ if (value == PM_QOS_DEFAULT_VALUE)
+ new_value = c->default_value;
+ else
+ new_value = value;
+
+ switch (action) {
+ case PM_QOS_REMOVE_REQ:
+ plist_del(node, &c->list);
+ break;
+ case PM_QOS_UPDATE_REQ:
+ /*
+ * to change the list, we atomically remove, reinit
+ * with new value and add, then see if the extremal
+ * changed
+ */
+ plist_del(node, &c->list);
+ case PM_QOS_ADD_REQ:
+ plist_node_init(node, new_value);
+ plist_add(node, &c->list);
+ break;
+ default:
+ /* no action */
+ ;
+ }
+
+ curr_value = pm_qos_get_value(c);
+ pm_qos_set_value(c, curr_value);
+
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+ trace_pm_qos_update_target(action, prev_value, curr_value);
+ if (prev_value != curr_value) {
+ ret = 1;
+ if (c->notifiers)
+ blocking_notifier_call_chain(c->notifiers,
+ (unsigned long)curr_value,
+ NULL);
+ } else {
+ ret = 0;
+ }
+ return ret;
+}
+
+/**
+ * pm_qos_flags_remove_req - Remove device PM QoS flags request.
+ * @pqf: Device PM QoS flags set to remove the request from.
+ * @req: Request to remove from the set.
+ */
+static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf,
+ struct pm_qos_flags_request *req)
+{
+ s32 val = 0;
+
+ list_del(&req->node);
+ list_for_each_entry(req, &pqf->list, node)
+ val |= req->flags;
+
+ pqf->effective_flags = val;
+}
+
+/**
+ * pm_qos_update_flags - Update a set of PM QoS flags.
+ * @pqf: Set of flags to update.
+ * @req: Request to add to the set, to modify, or to remove from the set.
+ * @action: Action to take on the set.
+ * @val: Value of the request to add or modify.
+ *
+ * Update the given set of PM QoS flags and call notifiers if the aggregate
+ * value has changed. Returns 1 if the aggregate constraint value has changed,
+ * 0 otherwise.
+ */
+bool pm_qos_update_flags(struct pm_qos_flags *pqf,
+ struct pm_qos_flags_request *req,
+ enum pm_qos_req_action action, s32 val)
+{
+ unsigned long irqflags;
+ s32 prev_value, curr_value;
+
+ spin_lock_irqsave(&pm_qos_lock, irqflags);
+
+ prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags;
+
+ switch (action) {
+ case PM_QOS_REMOVE_REQ:
+ pm_qos_flags_remove_req(pqf, req);
+ break;
+ case PM_QOS_UPDATE_REQ:
+ pm_qos_flags_remove_req(pqf, req);
+ case PM_QOS_ADD_REQ:
+ req->flags = val;
+ INIT_LIST_HEAD(&req->node);
+ list_add_tail(&req->node, &pqf->list);
+ pqf->effective_flags |= val;
+ break;
+ default:
+ /* no action */
+ ;
+ }
+
+ curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags;
+
+ spin_unlock_irqrestore(&pm_qos_lock, irqflags);
+
+ trace_pm_qos_update_flags(action, prev_value, curr_value);
+ return prev_value != curr_value;
+}
+
+/**
+ * pm_qos_request - returns current system wide qos expectation
+ * @pm_qos_class: identification of which qos value is requested
+ *
+ * This function returns the current target value.
+ */
+int pm_qos_request(int pm_qos_class)
+{
+ return pm_qos_read_value(pm_qos_array[pm_qos_class]->constraints);
+}
+EXPORT_SYMBOL_GPL(pm_qos_request);
+
+int pm_qos_request_active(struct pm_qos_request *req)
+{
+ return req->pm_qos_class != 0;
+}
+EXPORT_SYMBOL_GPL(pm_qos_request_active);
+
+static void __pm_qos_update_request(struct pm_qos_request *req,
+ s32 new_value)
+{
+ trace_pm_qos_update_request(req->pm_qos_class, new_value);
+
+ if (new_value != req->node.prio)
+ pm_qos_update_target(
+ pm_qos_array[req->pm_qos_class]->constraints,
+ &req->node, PM_QOS_UPDATE_REQ, new_value);
+}
+
+/**
+ * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
+ * @work: work struct for the delayed work (timeout)
+ *
+ * This cancels the timeout request by falling back to the default at timeout.
+ */
+static void pm_qos_work_fn(struct work_struct *work)
+{
+ struct pm_qos_request *req = container_of(to_delayed_work(work),
+ struct pm_qos_request,
+ work);
+
+ __pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
+}
+
+/**
+ * pm_qos_add_request - inserts new qos request into the list
+ * @req: pointer to a preallocated handle
+ * @pm_qos_class: identifies which list of qos request to use
+ * @value: defines the qos request
+ *
+ * This function inserts a new entry in the pm_qos_class list of requested qos
+ * performance characteristics. It recomputes the aggregate QoS expectations
+ * for the pm_qos_class of parameters and initializes the pm_qos_request
+ * handle. Caller needs to save this handle for later use in updates and
+ * removal.
+ */
+
+void pm_qos_add_request(struct pm_qos_request *req,
+ int pm_qos_class, s32 value)
+{
+ if (!req) /*guard against callers passing in null */
+ return;
+
+ if (pm_qos_request_active(req)) {
+ WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
+ return;
+ }
+ req->pm_qos_class = pm_qos_class;
+ INIT_DELAYED_WORK(&req->work, pm_qos_work_fn);
+ trace_pm_qos_add_request(pm_qos_class, value);
+ pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints,
+ &req->node, PM_QOS_ADD_REQ, value);
+}
+EXPORT_SYMBOL_GPL(pm_qos_add_request);
+
+/**
+ * pm_qos_update_request - modifies an existing qos request
+ * @req : handle to list element holding a pm_qos request to use
+ * @value: defines the qos request
+ *
+ * Updates an existing qos request for the pm_qos_class of parameters along
+ * with updating the target pm_qos_class value.
+ *
+ * Attempts are made to make this code callable on hot code paths.
+ */
+void pm_qos_update_request(struct pm_qos_request *req,
+ s32 new_value)
+{
+ if (!req) /*guard against callers passing in null */
+ return;
+
+ if (!pm_qos_request_active(req)) {
+ WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
+ return;
+ }
+
+ cancel_delayed_work_sync(&req->work);
+ __pm_qos_update_request(req, new_value);
+}
+EXPORT_SYMBOL_GPL(pm_qos_update_request);
+
+/**
+ * pm_qos_update_request_timeout - modifies an existing qos request temporarily.
+ * @req : handle to list element holding a pm_qos request to use
+ * @new_value: defines the temporal qos request
+ * @timeout_us: the effective duration of this qos request in usecs.
+ *
+ * After timeout_us, this qos request is cancelled automatically.
+ */
+void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,
+ unsigned long timeout_us)
+{
+ if (!req)
+ return;
+ if (WARN(!pm_qos_request_active(req),
+ "%s called for unknown object.", __func__))
+ return;
+
+ cancel_delayed_work_sync(&req->work);
+
+ trace_pm_qos_update_request_timeout(req->pm_qos_class,
+ new_value, timeout_us);
+ if (new_value != req->node.prio)
+ pm_qos_update_target(
+ pm_qos_array[req->pm_qos_class]->constraints,
+ &req->node, PM_QOS_UPDATE_REQ, new_value);
+
+ schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us));
+}
+
+/**
+ * pm_qos_remove_request - modifies an existing qos request
+ * @req: handle to request list element
+ *
+ * Will remove pm qos request from the list of constraints and
+ * recompute the current target value for the pm_qos_class. Call this
+ * on slow code paths.
+ */
+void pm_qos_remove_request(struct pm_qos_request *req)
+{
+ if (!req) /*guard against callers passing in null */
+ return;
+ /* silent return to keep pcm code cleaner */
+
+ if (!pm_qos_request_active(req)) {
+ WARN(1, KERN_ERR "pm_qos_remove_request() called for unknown object\n");
+ return;
+ }
+
+ cancel_delayed_work_sync(&req->work);
+
+ trace_pm_qos_remove_request(req->pm_qos_class, PM_QOS_DEFAULT_VALUE);
+ pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints,
+ &req->node, PM_QOS_REMOVE_REQ,
+ PM_QOS_DEFAULT_VALUE);
+ memset(req, 0, sizeof(*req));
+}
+EXPORT_SYMBOL_GPL(pm_qos_remove_request);
+
+/**
+ * pm_qos_add_notifier - sets notification entry for changes to target value
+ * @pm_qos_class: identifies which qos target changes should be notified.
+ * @notifier: notifier block managed by caller.
+ *
+ * will register the notifier into a notification chain that gets called
+ * upon changes to the pm_qos_class target value.
+ */
+int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+{
+ int retval;
+
+ retval = blocking_notifier_chain_register(
+ pm_qos_array[pm_qos_class]->constraints->notifiers,
+ notifier);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
+
+/**
+ * pm_qos_remove_notifier - deletes notification entry from chain.
+ * @pm_qos_class: identifies which qos target changes are notified.
+ * @notifier: notifier block to be removed.
+ *
+ * will remove the notifier from the notification chain that gets called
+ * upon changes to the pm_qos_class target value.
+ */
+int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
+{
+ int retval;
+
+ retval = blocking_notifier_chain_unregister(
+ pm_qos_array[pm_qos_class]->constraints->notifiers,
+ notifier);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
+
+/* User space interface to PM QoS classes via misc devices */
+static int register_pm_qos_misc(struct pm_qos_object *qos, struct dentry *d)
+{
+ qos->pm_qos_power_miscdev.minor = MISC_DYNAMIC_MINOR;
+ qos->pm_qos_power_miscdev.name = qos->name;
+ qos->pm_qos_power_miscdev.fops = &pm_qos_power_fops;
+
+ if (d) {
+ (void)debugfs_create_file(qos->name, S_IRUGO, d,
+ (void *)qos, &pm_qos_debug_fops);
+ }
+
+ return misc_register(&qos->pm_qos_power_miscdev);
+}
+
+static int find_pm_qos_object_by_minor(int minor)
+{
+ int pm_qos_class;
+
+ for (pm_qos_class = PM_QOS_CPU_DMA_LATENCY;
+ pm_qos_class < PM_QOS_NUM_CLASSES; pm_qos_class++) {
+ if (minor ==
+ pm_qos_array[pm_qos_class]->pm_qos_power_miscdev.minor)
+ return pm_qos_class;
+ }
+ return -1;
+}
+
+static int pm_qos_power_open(struct inode *inode, struct file *filp)
+{
+ long pm_qos_class;
+
+ pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
+ if (pm_qos_class >= PM_QOS_CPU_DMA_LATENCY) {
+ struct pm_qos_request *req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE);
+ filp->private_data = req;
+
+ return 0;
+ }
+ return -EPERM;
+}
+
+static int pm_qos_power_release(struct inode *inode, struct file *filp)
+{
+ struct pm_qos_request *req;
+
+ req = filp->private_data;
+ pm_qos_remove_request(req);
+ kfree(req);
+
+ return 0;
+}
+
+
+static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ unsigned long flags;
+ struct pm_qos_request *req = filp->private_data;
+
+ if (!req)
+ return -EINVAL;
+ if (!pm_qos_request_active(req))
+ return -EINVAL;
+
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ value = pm_qos_get_value(pm_qos_array[req->pm_qos_class]->constraints);
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+ return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
+}
+
+static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ struct pm_qos_request *req;
+
+ if (count == sizeof(s32)) {
+ if (copy_from_user(&value, buf, sizeof(s32)))
+ return -EFAULT;
+ } else {
+ int ret;
+
+ ret = kstrtos32_from_user(buf, count, 16, &value);
+ if (ret)
+ return ret;
+ }
+
+ req = filp->private_data;
+ pm_qos_update_request(req, value);
+
+ return count;
+}
+
+
+static int __init pm_qos_power_init(void)
+{
+ int ret = 0;
+ int i;
+ struct dentry *d;
+
+ BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES);
+
+ d = debugfs_create_dir("pm_qos", NULL);
+ if (IS_ERR_OR_NULL(d))
+ d = NULL;
+
+ for (i = PM_QOS_CPU_DMA_LATENCY; i < PM_QOS_NUM_CLASSES; i++) {
+ ret = register_pm_qos_misc(pm_qos_array[i], d);
+ if (ret < 0) {
+ printk(KERN_ERR "pm_qos_param: %s setup failed\n",
+ pm_qos_array[i]->name);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+late_initcall(pm_qos_power_init);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
new file mode 100644
index 000000000..ba9d20ebc
--- /dev/null
+++ b/kernel/power/snapshot.c
@@ -0,0 +1,2722 @@
+/*
+ * linux/kernel/power/snapshot.c
+ *
+ * This file provides system snapshot/restore functionality for swsusp.
+ *
+ * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pm.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/syscalls.h>
+#include <linux/console.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/compiler.h>
+#include <linux/ktime.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/io.h>
+
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
+#include "power.h"
+
+static int swsusp_page_is_free(struct page *);
+static void swsusp_set_page_forbidden(struct page *);
+static void swsusp_unset_page_forbidden(struct page *);
+
+/*
+ * Number of bytes to reserve for memory allocations made by device drivers
+ * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
+ * cause image creation to fail (tunable via /sys/power/reserved_size).
+ */
+unsigned long reserved_size;
+
+void __init hibernate_reserved_size_init(void)
+{
+ reserved_size = SPARE_PAGES * PAGE_SIZE;
+}
+
+/*
+ * Preferred image size in bytes (tunable via /sys/power/image_size).
+ * When it is set to N, swsusp will do its best to ensure the image
+ * size will not exceed N bytes, but if that is impossible, it will
+ * try to create the smallest image possible.
+ */
+unsigned long image_size;
+
+void __init hibernate_image_size_init(void)
+{
+ image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
+}
+
+/* List of PBEs needed for restoring the pages that were allocated before
+ * the suspend and included in the suspend image, but have also been
+ * allocated by the "resume" kernel, so their contents cannot be written
+ * directly to their "original" page frames.
+ */
+struct pbe *restore_pblist;
+
+/* Pointer to an auxiliary buffer (1 page) */
+static void *buffer;
+
+/**
+ * @safe_needed - on resume, for storing the PBE list and the image,
+ * we can only use memory pages that do not conflict with the pages
+ * used before suspend. The unsafe pages have PageNosaveFree set
+ * and we count them using unsafe_pages.
+ *
+ * Each allocated image page is marked as PageNosave and PageNosaveFree
+ * so that swsusp_free() can release it.
+ */
+
+#define PG_ANY 0
+#define PG_SAFE 1
+#define PG_UNSAFE_CLEAR 1
+#define PG_UNSAFE_KEEP 0
+
+static unsigned int allocated_unsafe_pages;
+
+static void *get_image_page(gfp_t gfp_mask, int safe_needed)
+{
+ void *res;
+
+ if (toi_running)
+ return (void *) toi_get_nonconflicting_page();
+
+ res = (void *)get_zeroed_page(gfp_mask);
+ if (safe_needed)
+ while (res && swsusp_page_is_free(virt_to_page(res))) {
+ /* The page is unsafe, mark it for swsusp_free() */
+ swsusp_set_page_forbidden(virt_to_page(res));
+ allocated_unsafe_pages++;
+ res = (void *)get_zeroed_page(gfp_mask);
+ }
+ if (res) {
+ swsusp_set_page_forbidden(virt_to_page(res));
+ swsusp_set_page_free(virt_to_page(res));
+ }
+ return res;
+}
+
+unsigned long get_safe_page(gfp_t gfp_mask)
+{
+ return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
+}
+
+static struct page *alloc_image_page(gfp_t gfp_mask)
+{
+ struct page *page;
+
+ page = alloc_page(gfp_mask);
+ if (page) {
+ swsusp_set_page_forbidden(page);
+ swsusp_set_page_free(page);
+ }
+ return page;
+}
+
+/**
+ * free_image_page - free page represented by @addr, allocated with
+ * get_image_page (page flags set by it must be cleared)
+ */
+
+static inline void free_image_page(void *addr, int clear_nosave_free)
+{
+ struct page *page;
+
+ BUG_ON(!virt_addr_valid(addr));
+
+ page = virt_to_page(addr);
+
+ if (toi_running) {
+ toi__free_page(29, page);
+ return;
+ }
+
+ swsusp_unset_page_forbidden(page);
+ if (clear_nosave_free)
+ swsusp_unset_page_free(page);
+
+ __free_page(page);
+}
+
+/* struct linked_page is used to build chains of pages */
+
+#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
+
+struct linked_page {
+ struct linked_page *next;
+ char data[LINKED_PAGE_DATA_SIZE];
+} __packed;
+
+static inline void
+free_list_of_pages(struct linked_page *list, int clear_page_nosave)
+{
+ while (list) {
+ struct linked_page *lp = list->next;
+
+ free_image_page(list, clear_page_nosave);
+ list = lp;
+ }
+}
+
+/**
+ * struct chain_allocator is used for allocating small objects out of
+ * a linked list of pages called 'the chain'.
+ *
+ * The chain grows each time when there is no room for a new object in
+ * the current page. The allocated objects cannot be freed individually.
+ * It is only possible to free them all at once, by freeing the entire
+ * chain.
+ *
+ * NOTE: The chain allocator may be inefficient if the allocated objects
+ * are not much smaller than PAGE_SIZE.
+ */
+
+struct chain_allocator {
+ struct linked_page *chain; /* the chain */
+ unsigned int used_space; /* total size of objects allocated out
+ * of the current page
+ */
+ gfp_t gfp_mask; /* mask for allocating pages */
+ int safe_needed; /* if set, only "safe" pages are allocated */
+};
+
+static void
+chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
+{
+ ca->chain = NULL;
+ ca->used_space = LINKED_PAGE_DATA_SIZE;
+ ca->gfp_mask = gfp_mask;
+ ca->safe_needed = safe_needed;
+}
+
+static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
+{
+ void *ret;
+
+ if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
+ struct linked_page *lp;
+
+ lp = get_image_page(ca->gfp_mask, ca->safe_needed);
+ if (!lp)
+ return NULL;
+
+ lp->next = ca->chain;
+ ca->chain = lp;
+ ca->used_space = 0;
+ }
+ ret = ca->chain->data + ca->used_space;
+ ca->used_space += size;
+ return ret;
+}
+
+/**
+ * Data types related to memory bitmaps.
+ *
+ * Memory bitmap is a structure consiting of many linked lists of
+ * objects. The main list's elements are of type struct zone_bitmap
+ * and each of them corresonds to one zone. For each zone bitmap
+ * object there is a list of objects of type struct bm_block that
+ * represent each blocks of bitmap in which information is stored.
+ *
+ * struct memory_bitmap contains a pointer to the main list of zone
+ * bitmap objects, a struct bm_position used for browsing the bitmap,
+ * and a pointer to the list of pages used for allocating all of the
+ * zone bitmap objects and bitmap block objects.
+ *
+ * NOTE: It has to be possible to lay out the bitmap in memory
+ * using only allocations of order 0. Additionally, the bitmap is
+ * designed to work with arbitrary number of zones (this is over the
+ * top for now, but let's avoid making unnecessary assumptions ;-).
+ *
+ * struct zone_bitmap contains a pointer to a list of bitmap block
+ * objects and a pointer to the bitmap block object that has been
+ * most recently used for setting bits. Additionally, it contains the
+ * pfns that correspond to the start and end of the represented zone.
+ *
+ * struct bm_block contains a pointer to the memory page in which
+ * information is stored (in the form of a block of bitmap)
+ * It also contains the pfns that correspond to the start and end of
+ * the represented memory area.
+ *
+ * The memory bitmap is organized as a radix tree to guarantee fast random
+ * access to the bits. There is one radix tree for each zone (as returned
+ * from create_mem_extents).
+ *
+ * One radix tree is represented by one struct mem_zone_bm_rtree. There are
+ * two linked lists for the nodes of the tree, one for the inner nodes and
+ * one for the leave nodes. The linked leave nodes are used for fast linear
+ * access of the memory bitmap.
+ *
+ * The struct rtree_node represents one node of the radix tree.
+ */
+
+#define BM_END_OF_MAP (~0UL)
+
+#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE)
+#define BM_BLOCK_SHIFT (PAGE_SHIFT + 3)
+#define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1)
+
+/*
+ * struct rtree_node is a wrapper struct to link the nodes
+ * of the rtree together for easy linear iteration over
+ * bits and easy freeing
+ */
+struct rtree_node {
+ struct list_head list;
+ unsigned long *data;
+};
+
+/*
+ * struct mem_zone_bm_rtree represents a bitmap used for one
+ * populated memory zone.
+ */
+struct mem_zone_bm_rtree {
+ struct list_head list; /* Link Zones together */
+ struct list_head nodes; /* Radix Tree inner nodes */
+ struct list_head leaves; /* Radix Tree leaves */
+ unsigned long start_pfn; /* Zone start page frame */
+ unsigned long end_pfn; /* Zone end page frame + 1 */
+ struct rtree_node *rtree; /* Radix Tree Root */
+ int levels; /* Number of Radix Tree Levels */
+ unsigned int blocks; /* Number of Bitmap Blocks */
+};
+
+/* strcut bm_position is used for browsing memory bitmaps */
+
+struct bm_position {
+ struct mem_zone_bm_rtree *zone;
+ struct rtree_node *node;
+ unsigned long node_pfn;
+ int node_bit;
+};
+
+#define BM_POSITION_SLOTS (NR_CPUS * 2)
+
+struct memory_bitmap {
+ struct list_head zones;
+ struct linked_page *p_list; /* list of pages used to store zone
+ * bitmap objects and bitmap block
+ * objects
+ */
+ struct bm_position cur[BM_POSITION_SLOTS]; /* most recently used bit position */
+};
+
+/* Functions that operate on memory bitmaps */
+
+#define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long))
+#if BITS_PER_LONG == 32
+#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2)
+#else
+#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3)
+#endif
+#define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
+
+/*
+ * alloc_rtree_node - Allocate a new node and add it to the radix tree.
+ *
+ * This function is used to allocate inner nodes as well as the
+ * leave nodes of the radix tree. It also adds the node to the
+ * corresponding linked list passed in by the *list parameter.
+ */
+static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
+ struct chain_allocator *ca,
+ struct list_head *list)
+{
+ struct rtree_node *node;
+
+ node = chain_alloc(ca, sizeof(struct rtree_node));
+ if (!node)
+ return NULL;
+
+ node->data = get_image_page(gfp_mask, safe_needed);
+ if (!node->data)
+ return NULL;
+
+ list_add_tail(&node->list, list);
+
+ return node;
+}
+
+/*
+ * add_rtree_block - Add a new leave node to the radix tree
+ *
+ * The leave nodes need to be allocated in order to keep the leaves
+ * linked list in order. This is guaranteed by the zone->blocks
+ * counter.
+ */
+static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
+ int safe_needed, struct chain_allocator *ca)
+{
+ struct rtree_node *node, *block, **dst;
+ unsigned int levels_needed, block_nr;
+ int i;
+
+ block_nr = zone->blocks;
+ levels_needed = 0;
+
+ /* How many levels do we need for this block nr? */
+ while (block_nr) {
+ levels_needed += 1;
+ block_nr >>= BM_RTREE_LEVEL_SHIFT;
+ }
+
+ /* Make sure the rtree has enough levels */
+ for (i = zone->levels; i < levels_needed; i++) {
+ node = alloc_rtree_node(gfp_mask, safe_needed, ca,
+ &zone->nodes);
+ if (!node)
+ return -ENOMEM;
+
+ node->data[0] = (unsigned long)zone->rtree;
+ zone->rtree = node;
+ zone->levels += 1;
+ }
+
+ /* Allocate new block */
+ block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves);
+ if (!block)
+ return -ENOMEM;
+
+ /* Now walk the rtree to insert the block */
+ node = zone->rtree;
+ dst = &zone->rtree;
+ block_nr = zone->blocks;
+ for (i = zone->levels; i > 0; i--) {
+ int index;
+
+ if (!node) {
+ node = alloc_rtree_node(gfp_mask, safe_needed, ca,
+ &zone->nodes);
+ if (!node)
+ return -ENOMEM;
+ *dst = node;
+ }
+
+ index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
+ index &= BM_RTREE_LEVEL_MASK;
+ dst = (struct rtree_node **)&((*dst)->data[index]);
+ node = *dst;
+ }
+
+ zone->blocks += 1;
+ *dst = block;
+
+ return 0;
+}
+
+static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
+ int clear_nosave_free);
+
+/*
+ * create_zone_bm_rtree - create a radix tree for one zone
+ *
+ * Allocated the mem_zone_bm_rtree structure and initializes it.
+ * This function also allocated and builds the radix tree for the
+ * zone.
+ */
+static struct mem_zone_bm_rtree *
+create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed,
+ struct chain_allocator *ca,
+ unsigned long start, unsigned long end)
+{
+ struct mem_zone_bm_rtree *zone;
+ unsigned int i, nr_blocks;
+ unsigned long pages;
+
+ pages = end - start;
+ zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree));
+ if (!zone)
+ return NULL;
+
+ INIT_LIST_HEAD(&zone->nodes);
+ INIT_LIST_HEAD(&zone->leaves);
+ zone->start_pfn = start;
+ zone->end_pfn = end;
+ nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
+
+ for (i = 0; i < nr_blocks; i++) {
+ if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) {
+ free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR);
+ return NULL;
+ }
+ }
+
+ return zone;
+}
+
+/*
+ * free_zone_bm_rtree - Free the memory of the radix tree
+ *
+ * Free all node pages of the radix tree. The mem_zone_bm_rtree
+ * structure itself is not freed here nor are the rtree_node
+ * structs.
+ */
+static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
+ int clear_nosave_free)
+{
+ struct rtree_node *node;
+
+ list_for_each_entry(node, &zone->nodes, list)
+ free_image_page(node->data, clear_nosave_free);
+
+ list_for_each_entry(node, &zone->leaves, list)
+ free_image_page(node->data, clear_nosave_free);
+}
+
+void memory_bm_position_reset(struct memory_bitmap *bm)
+{
+ int index;
+
+ for (index = 0; index < BM_POSITION_SLOTS; index++) {
+ bm->cur[index].zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
+ list);
+ bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
+ struct rtree_node, list);
+ bm->cur[index].node_pfn = 0;
+ bm->cur[index].node_bit = 0;
+ }
+}
+
+static void memory_bm_clear_current(struct memory_bitmap *bm, int index);
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index);
+
+/**
+ * memory_bm_clear
+ * @param bm - The bitmap to clear
+ *
+ * Only run while single threaded - locking not needed
+ */
+void memory_bm_clear(struct memory_bitmap *bm)
+{
+ memory_bm_position_reset(bm);
+
+ while (memory_bm_next_pfn(bm, 0) != BM_END_OF_MAP) {
+ memory_bm_clear_current(bm, 0);
+ }
+
+ memory_bm_position_reset(bm);
+}
+static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+
+struct mem_extent {
+ struct list_head hook;
+ unsigned long start;
+ unsigned long end;
+};
+
+/**
+ * free_mem_extents - free a list of memory extents
+ * @list - list of extents to empty
+ */
+static void free_mem_extents(struct list_head *list)
+{
+ struct mem_extent *ext, *aux;
+
+ list_for_each_entry_safe(ext, aux, list, hook) {
+ list_del(&ext->hook);
+ kfree(ext);
+ }
+}
+
+/**
+ * create_mem_extents - create a list of memory extents representing
+ * contiguous ranges of PFNs
+ * @list - list to put the extents into
+ * @gfp_mask - mask to use for memory allocations
+ */
+static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
+{
+ struct zone *zone;
+
+ INIT_LIST_HEAD(list);
+
+ for_each_populated_zone(zone) {
+ unsigned long zone_start, zone_end;
+ struct mem_extent *ext, *cur, *aux;
+
+ zone_start = zone->zone_start_pfn;
+ zone_end = zone_end_pfn(zone);
+
+ list_for_each_entry(ext, list, hook)
+ if (zone_start <= ext->end)
+ break;
+
+ if (&ext->hook == list || zone_end < ext->start) {
+ /* New extent is necessary */
+ struct mem_extent *new_ext;
+
+ new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
+ if (!new_ext) {
+ free_mem_extents(list);
+ return -ENOMEM;
+ }
+ new_ext->start = zone_start;
+ new_ext->end = zone_end;
+ list_add_tail(&new_ext->hook, &ext->hook);
+ continue;
+ }
+
+ /* Merge this zone's range of PFNs with the existing one */
+ if (zone_start < ext->start)
+ ext->start = zone_start;
+ if (zone_end > ext->end)
+ ext->end = zone_end;
+
+ /* More merging may be possible */
+ cur = ext;
+ list_for_each_entry_safe_continue(cur, aux, list, hook) {
+ if (zone_end < cur->start)
+ break;
+ if (zone_end < cur->end)
+ ext->end = cur->end;
+ list_del(&cur->hook);
+ kfree(cur);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * memory_bm_create - allocate memory for a memory bitmap
+ */
+static int
+memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
+{
+ struct chain_allocator ca;
+ struct list_head mem_extents;
+ struct mem_extent *ext;
+ int error;
+
+ chain_init(&ca, gfp_mask, safe_needed);
+ INIT_LIST_HEAD(&bm->zones);
+
+ error = create_mem_extents(&mem_extents, gfp_mask);
+ if (error)
+ return error;
+
+ list_for_each_entry(ext, &mem_extents, hook) {
+ struct mem_zone_bm_rtree *zone;
+
+ zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca,
+ ext->start, ext->end);
+ if (!zone) {
+ error = -ENOMEM;
+ goto Error;
+ }
+ list_add_tail(&zone->list, &bm->zones);
+ }
+
+ bm->p_list = ca.chain;
+
+ memory_bm_position_reset(bm);
+ Exit:
+ free_mem_extents(&mem_extents);
+ return error;
+
+ Error:
+ bm->p_list = ca.chain;
+ memory_bm_free(bm, PG_UNSAFE_CLEAR);
+ goto Exit;
+}
+
+/**
+ * memory_bm_free - free memory occupied by the memory bitmap @bm
+ */
+static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
+{
+ struct mem_zone_bm_rtree *zone;
+
+ list_for_each_entry(zone, &bm->zones, list)
+ free_zone_bm_rtree(zone, clear_nosave_free);
+
+ free_list_of_pages(bm->p_list, clear_nosave_free);
+
+ INIT_LIST_HEAD(&bm->zones);
+}
+
+/**
+ * memory_bm_find_bit - Find the bit for pfn in the memory
+ * bitmap
+ *
+ * Find the bit in the bitmap @bm that corresponds to given pfn.
+ * The cur.zone, cur.block and cur.node_pfn member of @bm are
+ * updated.
+ * It walks the radix tree to find the page which contains the bit for
+ * pfn and returns the bit position in **addr and *bit_nr.
+ */
+int memory_bm_find_bit(struct memory_bitmap *bm, int index,
+ unsigned long pfn, void **addr, unsigned int *bit_nr)
+{
+ struct mem_zone_bm_rtree *curr, *zone;
+ struct rtree_node *node;
+ int i, block_nr;
+
+ if (!bm->cur[index].zone) {
+ // Reset
+ bm->cur[index].zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree,
+ list);
+ bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
+ struct rtree_node, list);
+ bm->cur[index].node_pfn = 0;
+ bm->cur[index].node_bit = 0;
+ }
+
+ zone = bm->cur[index].zone;
+
+ if (pfn >= zone->start_pfn && pfn < zone->end_pfn)
+ goto zone_found;
+
+ zone = NULL;
+
+ /* Find the right zone */
+ list_for_each_entry(curr, &bm->zones, list) {
+ if (pfn >= curr->start_pfn && pfn < curr->end_pfn) {
+ zone = curr;
+ break;
+ }
+ }
+
+ if (!zone)
+ return -EFAULT;
+
+zone_found:
+ /*
+ * We have a zone. Now walk the radix tree to find the leave
+ * node for our pfn.
+ */
+
+ node = bm->cur[index].node;
+ if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur[index].node_pfn)
+ goto node_found;
+
+ node = zone->rtree;
+ block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT;
+
+ for (i = zone->levels; i > 0; i--) {
+ int index;
+
+ index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT);
+ index &= BM_RTREE_LEVEL_MASK;
+ BUG_ON(node->data[index] == 0);
+ node = (struct rtree_node *)node->data[index];
+ }
+
+node_found:
+ /* Update last position */
+ bm->cur[index].zone = zone;
+ bm->cur[index].node = node;
+ bm->cur[index].node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+
+ /* Set return values */
+ *addr = node->data;
+ *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK;
+
+ return 0;
+}
+
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+ int error;
+
+ error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
+ BUG_ON(error);
+ set_bit(bit, addr);
+}
+
+int mem_bm_set_bit_check(struct memory_bitmap *bm, int index, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+ int error;
+
+ error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
+ if (!error)
+ set_bit(bit, addr);
+
+ return error;
+}
+
+void memory_bm_clear_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+ int error;
+
+ error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
+ BUG_ON(error);
+ clear_bit(bit, addr);
+}
+
+static void memory_bm_clear_current(struct memory_bitmap *bm, int index)
+{
+ int bit;
+
+ bit = max(bm->cur[index].node_bit - 1, 0);
+ clear_bit(bit, bm->cur[index].node->data);
+}
+
+int memory_bm_test_bit(struct memory_bitmap *bm, int index, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+ int error;
+
+ error = memory_bm_find_bit(bm, index, pfn, &addr, &bit);
+ BUG_ON(error);
+ return test_bit(bit, addr);
+}
+
+static bool memory_bm_pfn_present(struct memory_bitmap *bm, int index, unsigned long pfn)
+{
+ void *addr;
+ unsigned int bit;
+
+ return !memory_bm_find_bit(bm, index, pfn, &addr, &bit);
+}
+
+/*
+ * rtree_next_node - Jumps to the next leave node
+ *
+ * Sets the position to the beginning of the next node in the
+ * memory bitmap. This is either the next node in the current
+ * zone's radix tree or the first node in the radix tree of the
+ * next zone.
+ *
+ * Returns true if there is a next node, false otherwise.
+ */
+static bool rtree_next_node(struct memory_bitmap *bm, int index)
+{
+ bm->cur[index].node = list_entry(bm->cur[index].node->list.next,
+ struct rtree_node, list);
+ if (&bm->cur[index].node->list != &bm->cur[index].zone->leaves) {
+ bm->cur[index].node_pfn += BM_BITS_PER_BLOCK;
+ bm->cur[index].node_bit = 0;
+ touch_softlockup_watchdog();
+ return true;
+ }
+
+ /* No more nodes, goto next zone */
+ bm->cur[index].zone = list_entry(bm->cur[index].zone->list.next,
+ struct mem_zone_bm_rtree, list);
+ if (&bm->cur[index].zone->list != &bm->zones) {
+ bm->cur[index].node = list_entry(bm->cur[index].zone->leaves.next,
+ struct rtree_node, list);
+ bm->cur[index].node_pfn = 0;
+ bm->cur[index].node_bit = 0;
+ return true;
+ }
+
+ /* No more zones */
+ return false;
+}
+
+/**
+ * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm
+ *
+ * Starting from the last returned position this function searches
+ * for the next set bit in the memory bitmap and returns its
+ * number. If no more bit is set BM_END_OF_MAP is returned.
+ *
+ * It is required to run memory_bm_position_reset() before the
+ * first call to this function.
+ */
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index)
+{
+ unsigned long bits, pfn, pages;
+ int bit;
+
+ index += NR_CPUS; /* Iteration state is separated from get/set/test */
+
+ do {
+ pages = bm->cur[index].zone->end_pfn - bm->cur[index].zone->start_pfn;
+ bits = min(pages - bm->cur[index].node_pfn, BM_BITS_PER_BLOCK);
+ bit = find_next_bit(bm->cur[index].node->data, bits,
+ bm->cur[index].node_bit);
+ if (bit < bits) {
+ pfn = bm->cur[index].zone->start_pfn + bm->cur[index].node_pfn + bit;
+ bm->cur[index].node_bit = bit + 1;
+ return pfn;
+ }
+ } while (rtree_next_node(bm, index));
+
+ return BM_END_OF_MAP;
+}
+
+LIST_HEAD(nosave_regions);
+
+/**
+ * register_nosave_region - register a range of page frames the contents
+ * of which should not be saved during the suspend (to be used in the early
+ * initialization code)
+ */
+
+void __init
+__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
+ int use_kmalloc)
+{
+ struct nosave_region *region;
+
+ if (start_pfn >= end_pfn)
+ return;
+
+ if (!list_empty(&nosave_regions)) {
+ /* Try to extend the previous region (they should be sorted) */
+ region = list_entry(nosave_regions.prev,
+ struct nosave_region, list);
+ if (region->end_pfn == start_pfn) {
+ region->end_pfn = end_pfn;
+ goto Report;
+ }
+ }
+ if (use_kmalloc) {
+ /* during init, this shouldn't fail */
+ region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
+ BUG_ON(!region);
+ } else
+ /* This allocation cannot fail */
+ region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
+ region->start_pfn = start_pfn;
+ region->end_pfn = end_pfn;
+ list_add_tail(&region->list, &nosave_regions);
+ Report:
+ printk(KERN_INFO "PM: Registered nosave memory: [mem %#010llx-%#010llx]\n",
+ (unsigned long long) start_pfn << PAGE_SHIFT,
+ ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
+}
+
+/*
+ * Set bits in this map correspond to the page frames the contents of which
+ * should not be saved during the suspend.
+ */
+static struct memory_bitmap *forbidden_pages_map;
+
+/* Set bits in this map correspond to free page frames. */
+static struct memory_bitmap *free_pages_map;
+
+/*
+ * Each page frame allocated for creating the image is marked by setting the
+ * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
+ */
+
+void swsusp_set_page_free(struct page *page)
+{
+ if (free_pages_map)
+ memory_bm_set_bit(free_pages_map, 0, page_to_pfn(page));
+}
+
+static int swsusp_page_is_free(struct page *page)
+{
+ return free_pages_map ?
+ memory_bm_test_bit(free_pages_map, 0, page_to_pfn(page)) : 0;
+}
+
+void swsusp_unset_page_free(struct page *page)
+{
+ if (free_pages_map)
+ memory_bm_clear_bit(free_pages_map, 0, page_to_pfn(page));
+}
+
+static void swsusp_set_page_forbidden(struct page *page)
+{
+ if (forbidden_pages_map)
+ memory_bm_set_bit(forbidden_pages_map, 0, page_to_pfn(page));
+}
+
+int swsusp_page_is_forbidden(struct page *page)
+{
+ return forbidden_pages_map ?
+ memory_bm_test_bit(forbidden_pages_map, 0, page_to_pfn(page)) : 0;
+}
+
+static void swsusp_unset_page_forbidden(struct page *page)
+{
+ if (forbidden_pages_map)
+ memory_bm_clear_bit(forbidden_pages_map, 0, page_to_pfn(page));
+}
+
+/**
+ * mark_nosave_pages - set bits corresponding to the page frames the
+ * contents of which should not be saved in a given bitmap.
+ */
+
+static void mark_nosave_pages(struct memory_bitmap *bm)
+{
+ struct nosave_region *region;
+
+ if (list_empty(&nosave_regions))
+ return;
+
+ list_for_each_entry(region, &nosave_regions, list) {
+ unsigned long pfn;
+
+ pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
+ (unsigned long long) region->start_pfn << PAGE_SHIFT,
+ ((unsigned long long) region->end_pfn << PAGE_SHIFT)
+ - 1);
+
+ for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+ if (pfn_valid(pfn)) {
+ /*
+ * It is safe to ignore the result of
+ * mem_bm_set_bit_check() here, since we won't
+ * touch the PFNs for which the error is
+ * returned anyway.
+ */
+ mem_bm_set_bit_check(bm, 0, pfn);
+ }
+ }
+}
+
+/**
+ * create_basic_memory_bitmaps - create bitmaps needed for marking page
+ * frames that should not be saved and free page frames. The pointers
+ * forbidden_pages_map and free_pages_map are only modified if everything
+ * goes well, because we don't want the bits to be used before both bitmaps
+ * are set up.
+ */
+
+int create_basic_memory_bitmaps(void)
+{
+ struct memory_bitmap *bm1, *bm2;
+ int error = 0;
+
+ if (forbidden_pages_map && free_pages_map)
+ return 0;
+ else
+ BUG_ON(forbidden_pages_map || free_pages_map);
+
+ bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+ if (!bm1)
+ return -ENOMEM;
+
+ error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
+ if (error)
+ goto Free_first_object;
+
+ bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+ if (!bm2)
+ goto Free_first_bitmap;
+
+ error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
+ if (error)
+ goto Free_second_object;
+
+ forbidden_pages_map = bm1;
+ free_pages_map = bm2;
+ mark_nosave_pages(forbidden_pages_map);
+
+ pr_debug("PM: Basic memory bitmaps created\n");
+
+ return 0;
+
+ Free_second_object:
+ kfree(bm2);
+ Free_first_bitmap:
+ memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+ Free_first_object:
+ kfree(bm1);
+ return -ENOMEM;
+}
+
+/**
+ * free_basic_memory_bitmaps - free memory bitmaps allocated by
+ * create_basic_memory_bitmaps(). The auxiliary pointers are necessary
+ * so that the bitmaps themselves are not referred to while they are being
+ * freed.
+ */
+
+void free_basic_memory_bitmaps(void)
+{
+ struct memory_bitmap *bm1, *bm2;
+
+ if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
+ return;
+
+ bm1 = forbidden_pages_map;
+ bm2 = free_pages_map;
+ forbidden_pages_map = NULL;
+ free_pages_map = NULL;
+ memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+ kfree(bm1);
+ memory_bm_free(bm2, PG_UNSAFE_CLEAR);
+ kfree(bm2);
+
+ pr_debug("PM: Basic memory bitmaps freed\n");
+}
+
+/**
+ * snapshot_additional_pages - estimate the number of additional pages
+ * be needed for setting up the suspend image data structures for given
+ * zone (usually the returned value is greater than the exact number)
+ */
+
+unsigned int snapshot_additional_pages(struct zone *zone)
+{
+ unsigned int rtree, nodes;
+
+ rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
+ rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node),
+ LINKED_PAGE_DATA_SIZE);
+ while (nodes > 1) {
+ nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL);
+ rtree += nodes;
+ }
+
+ return 2 * rtree;
+}
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * count_free_highmem_pages - compute the total number of free highmem
+ * pages, system-wide.
+ */
+
+static unsigned int count_free_highmem_pages(void)
+{
+ struct zone *zone;
+ unsigned int cnt = 0;
+
+ for_each_populated_zone(zone)
+ if (is_highmem(zone))
+ cnt += zone_page_state(zone, NR_FREE_PAGES);
+
+ return cnt;
+}
+
+/**
+ * saveable_highmem_page - Determine whether a highmem page should be
+ * included in the suspend image.
+ *
+ * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
+ * and it isn't a part of a free chunk of pages.
+ */
+struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
+{
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ return NULL;
+
+ page = pfn_to_page(pfn);
+ if (page_zone(page) != zone)
+ return NULL;
+
+ BUG_ON(!PageHighMem(page));
+
+ if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) ||
+ PageReserved(page))
+ return NULL;
+
+ if (page_is_guard(page))
+ return NULL;
+
+ return page;
+}
+
+/**
+ * count_highmem_pages - compute the total number of saveable highmem
+ * pages.
+ */
+
+static unsigned int count_highmem_pages(void)
+{
+ struct zone *zone;
+ unsigned int n = 0;
+
+ for_each_populated_zone(zone) {
+ unsigned long pfn, max_zone_pfn;
+
+ if (!is_highmem(zone))
+ continue;
+
+ mark_free_pages(zone);
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (saveable_highmem_page(zone, pfn))
+ n++;
+ }
+ return n;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * saveable_page - Determine whether a non-highmem page should be included
+ * in the suspend image.
+ *
+ * We should save the page if it isn't Nosave, and is not in the range
+ * of pages statically defined as 'unsaveable', and it isn't a part of
+ * a free chunk of pages.
+ */
+struct page *saveable_page(struct zone *zone, unsigned long pfn)
+{
+ struct page *page;
+
+ if (!pfn_valid(pfn))
+ return NULL;
+
+ page = pfn_to_page(pfn);
+ if (page_zone(page) != zone)
+ return NULL;
+
+ BUG_ON(PageHighMem(page));
+
+ if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
+ return NULL;
+
+ if (PageReserved(page)
+ && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
+ return NULL;
+
+ if (page_is_guard(page))
+ return NULL;
+
+ return page;
+}
+
+/**
+ * count_data_pages - compute the total number of saveable non-highmem
+ * pages.
+ */
+
+static unsigned int count_data_pages(void)
+{
+ struct zone *zone;
+ unsigned long pfn, max_zone_pfn;
+ unsigned int n = 0;
+
+ for_each_populated_zone(zone) {
+ if (is_highmem(zone))
+ continue;
+
+ mark_free_pages(zone);
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (saveable_page(zone, pfn))
+ n++;
+ }
+ return n;
+}
+
+/* This is needed, because copy_page and memcpy are not usable for copying
+ * task structs.
+ */
+static inline void do_copy_page(long *dst, long *src)
+{
+ int n;
+
+ for (n = PAGE_SIZE / sizeof(long); n; n--)
+ *dst++ = *src++;
+}
+
+
+/**
+ * safe_copy_page - check if the page we are going to copy is marked as
+ * present in the kernel page tables (this always is the case if
+ * CONFIG_DEBUG_PAGEALLOC is not set and in that case
+ * kernel_page_present() always returns 'true').
+ */
+static void safe_copy_page(void *dst, struct page *s_page)
+{
+ if (kernel_page_present(s_page)) {
+ do_copy_page(dst, page_address(s_page));
+ } else {
+ kernel_map_pages(s_page, 1, 1);
+ do_copy_page(dst, page_address(s_page));
+ kernel_map_pages(s_page, 1, 0);
+ }
+}
+
+
+#ifdef CONFIG_HIGHMEM
+static inline struct page *
+page_is_saveable(struct zone *zone, unsigned long pfn)
+{
+ return is_highmem(zone) ?
+ saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
+}
+
+static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+{
+ struct page *s_page, *d_page;
+ void *src, *dst;
+
+ s_page = pfn_to_page(src_pfn);
+ d_page = pfn_to_page(dst_pfn);
+ if (PageHighMem(s_page)) {
+ src = kmap_atomic(s_page);
+ dst = kmap_atomic(d_page);
+ do_copy_page(dst, src);
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+ } else {
+ if (PageHighMem(d_page)) {
+ /* Page pointed to by src may contain some kernel
+ * data modified by kmap_atomic()
+ */
+ safe_copy_page(buffer, s_page);
+ dst = kmap_atomic(d_page);
+ copy_page(dst, buffer);
+ kunmap_atomic(dst);
+ } else {
+ safe_copy_page(page_address(d_page), s_page);
+ }
+ }
+}
+#else
+#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
+
+static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+{
+ safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+ pfn_to_page(src_pfn));
+}
+#endif /* CONFIG_HIGHMEM */
+
+static void
+copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
+{
+ struct zone *zone;
+ unsigned long pfn;
+
+ for_each_populated_zone(zone) {
+ unsigned long max_zone_pfn;
+
+ mark_free_pages(zone);
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (page_is_saveable(zone, pfn))
+ memory_bm_set_bit(orig_bm, 0, pfn);
+ }
+ memory_bm_position_reset(orig_bm);
+ memory_bm_position_reset(copy_bm);
+ for(;;) {
+ pfn = memory_bm_next_pfn(orig_bm, 0);
+ if (unlikely(pfn == BM_END_OF_MAP))
+ break;
+ copy_data_page(memory_bm_next_pfn(copy_bm, 0), pfn);
+ }
+}
+
+/* Total number of image pages */
+static unsigned int nr_copy_pages;
+/* Number of pages needed for saving the original pfns of the image pages */
+static unsigned int nr_meta_pages;
+/*
+ * Numbers of normal and highmem page frames allocated for hibernation image
+ * before suspending devices.
+ */
+unsigned int alloc_normal, alloc_highmem;
+/*
+ * Memory bitmap used for marking saveable pages (during hibernation) or
+ * hibernation image pages (during restore)
+ */
+static struct memory_bitmap orig_bm;
+/*
+ * Memory bitmap used during hibernation for marking allocated page frames that
+ * will contain copies of saveable pages. During restore it is initially used
+ * for marking hibernation image pages, but then the set bits from it are
+ * duplicated in @orig_bm and it is released. On highmem systems it is next
+ * used for marking "safe" highmem pages, but it has to be reinitialized for
+ * this purpose.
+ */
+static struct memory_bitmap copy_bm;
+
+/**
+ * swsusp_free - free pages allocated for the suspend.
+ *
+ * Suspend pages are alocated before the atomic copy is made, so we
+ * need to release them after the resume.
+ */
+
+void swsusp_free(void)
+{
+ unsigned long fb_pfn, fr_pfn;
+
+ if (!forbidden_pages_map || !free_pages_map)
+ goto out;
+
+ memory_bm_position_reset(forbidden_pages_map);
+ memory_bm_position_reset(free_pages_map);
+
+loop:
+ fr_pfn = memory_bm_next_pfn(free_pages_map, 0);
+ fb_pfn = memory_bm_next_pfn(forbidden_pages_map, 0);
+
+ /*
+ * Find the next bit set in both bitmaps. This is guaranteed to
+ * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP.
+ */
+ do {
+ if (fb_pfn < fr_pfn)
+ fb_pfn = memory_bm_next_pfn(forbidden_pages_map, 0);
+ if (fr_pfn < fb_pfn)
+ fr_pfn = memory_bm_next_pfn(free_pages_map, 0);
+ } while (fb_pfn != fr_pfn);
+
+ if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) {
+ struct page *page = pfn_to_page(fr_pfn);
+
+ memory_bm_clear_current(forbidden_pages_map, 0);
+ memory_bm_clear_current(free_pages_map, 0);
+ __free_page(page);
+ goto loop;
+ }
+
+out:
+ nr_copy_pages = 0;
+ nr_meta_pages = 0;
+ restore_pblist = NULL;
+ buffer = NULL;
+ alloc_normal = 0;
+ alloc_highmem = 0;
+}
+
+/* Helper functions used for the shrinking of memory. */
+
+#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN)
+
+/**
+ * preallocate_image_pages - Allocate a number of pages for hibernation image
+ * @nr_pages: Number of page frames to allocate.
+ * @mask: GFP flags to use for the allocation.
+ *
+ * Return value: Number of page frames actually allocated
+ */
+static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
+{
+ unsigned long nr_alloc = 0;
+
+ while (nr_pages > 0) {
+ struct page *page;
+
+ page = alloc_image_page(mask);
+ if (!page)
+ break;
+ memory_bm_set_bit(&copy_bm, 0, page_to_pfn(page));
+ if (PageHighMem(page))
+ alloc_highmem++;
+ else
+ alloc_normal++;
+ nr_pages--;
+ nr_alloc++;
+ }
+
+ return nr_alloc;
+}
+
+static unsigned long preallocate_image_memory(unsigned long nr_pages,
+ unsigned long avail_normal)
+{
+ unsigned long alloc;
+
+ if (avail_normal <= alloc_normal)
+ return 0;
+
+ alloc = avail_normal - alloc_normal;
+ if (nr_pages < alloc)
+ alloc = nr_pages;
+
+ return preallocate_image_pages(alloc, GFP_IMAGE);
+}
+
+#ifdef CONFIG_HIGHMEM
+static unsigned long preallocate_image_highmem(unsigned long nr_pages)
+{
+ return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
+}
+
+/**
+ * __fraction - Compute (an approximation of) x * (multiplier / base)
+ */
+static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
+{
+ x *= multiplier;
+ do_div(x, base);
+ return (unsigned long)x;
+}
+
+static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+ unsigned long highmem,
+ unsigned long total)
+{
+ unsigned long alloc = __fraction(nr_pages, highmem, total);
+
+ return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
+}
+#else /* CONFIG_HIGHMEM */
+static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
+{
+ return 0;
+}
+
+static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
+ unsigned long highmem,
+ unsigned long total)
+{
+ return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * free_unnecessary_pages - Release preallocated pages not needed for the image
+ */
+static unsigned long free_unnecessary_pages(void)
+{
+ unsigned long save, to_free_normal, to_free_highmem, free;
+
+ save = count_data_pages();
+ if (alloc_normal >= save) {
+ to_free_normal = alloc_normal - save;
+ save = 0;
+ } else {
+ to_free_normal = 0;
+ save -= alloc_normal;
+ }
+ save += count_highmem_pages();
+ if (alloc_highmem >= save) {
+ to_free_highmem = alloc_highmem - save;
+ } else {
+ to_free_highmem = 0;
+ save -= alloc_highmem;
+ if (to_free_normal > save)
+ to_free_normal -= save;
+ else
+ to_free_normal = 0;
+ }
+ free = to_free_normal + to_free_highmem;
+
+ memory_bm_position_reset(&copy_bm);
+
+ while (to_free_normal > 0 || to_free_highmem > 0) {
+ unsigned long pfn = memory_bm_next_pfn(&copy_bm, 0);
+ struct page *page = pfn_to_page(pfn);
+
+ if (PageHighMem(page)) {
+ if (!to_free_highmem)
+ continue;
+ to_free_highmem--;
+ alloc_highmem--;
+ } else {
+ if (!to_free_normal)
+ continue;
+ to_free_normal--;
+ alloc_normal--;
+ }
+ memory_bm_clear_bit(&copy_bm, 0, pfn);
+ swsusp_unset_page_forbidden(page);
+ swsusp_unset_page_free(page);
+ __free_page(page);
+ }
+
+ return free;
+}
+
+/**
+ * minimum_image_size - Estimate the minimum acceptable size of an image
+ * @saveable: Number of saveable pages in the system.
+ *
+ * We want to avoid attempting to free too much memory too hard, so estimate the
+ * minimum acceptable size of a hibernation image to use as the lower limit for
+ * preallocating memory.
+ *
+ * We assume that the minimum image size should be proportional to
+ *
+ * [number of saveable pages] - [number of pages that can be freed in theory]
+ *
+ * where the second term is the sum of (1) reclaimable slab pages, (2) active
+ * and (3) inactive anonymous pages, (4) active and (5) inactive file pages,
+ * minus mapped file pages.
+ */
+static unsigned long minimum_image_size(unsigned long saveable)
+{
+ unsigned long size;
+
+ size = global_page_state(NR_SLAB_RECLAIMABLE)
+ + global_page_state(NR_ACTIVE_ANON)
+ + global_page_state(NR_INACTIVE_ANON)
+ + global_page_state(NR_ACTIVE_FILE)
+ + global_page_state(NR_INACTIVE_FILE)
+ - global_page_state(NR_FILE_MAPPED);
+
+ return saveable <= size ? 0 : saveable - size;
+}
+
+/**
+ * hibernate_preallocate_memory - Preallocate memory for hibernation image
+ *
+ * To create a hibernation image it is necessary to make a copy of every page
+ * frame in use. We also need a number of page frames to be free during
+ * hibernation for allocations made while saving the image and for device
+ * drivers, in case they need to allocate memory from their hibernation
+ * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough
+ * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through
+ * /sys/power/reserved_size, respectively). To make this happen, we compute the
+ * total number of available page frames and allocate at least
+ *
+ * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2
+ * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE)
+ *
+ * of them, which corresponds to the maximum size of a hibernation image.
+ *
+ * If image_size is set below the number following from the above formula,
+ * the preallocation of memory is continued until the total number of saveable
+ * pages in the system is below the requested image size or the minimum
+ * acceptable image size returned by minimum_image_size(), whichever is greater.
+ */
+int hibernate_preallocate_memory(void)
+{
+ struct zone *zone;
+ unsigned long saveable, size, max_size, count, highmem, pages = 0;
+ unsigned long alloc, save_highmem, pages_highmem, avail_normal;
+ ktime_t start, stop;
+ int error;
+
+ printk(KERN_INFO "PM: Preallocating image memory... ");
+ start = ktime_get();
+
+ error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
+ if (error)
+ goto err_out;
+
+ error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
+ if (error)
+ goto err_out;
+
+ alloc_normal = 0;
+ alloc_highmem = 0;
+
+ /* Count the number of saveable data pages. */
+ save_highmem = count_highmem_pages();
+ saveable = count_data_pages();
+
+ /*
+ * Compute the total number of page frames we can use (count) and the
+ * number of pages needed for image metadata (size).
+ */
+ count = saveable;
+ saveable += save_highmem;
+ highmem = save_highmem;
+ size = 0;
+ for_each_populated_zone(zone) {
+ size += snapshot_additional_pages(zone);
+ if (is_highmem(zone))
+ highmem += zone_page_state(zone, NR_FREE_PAGES);
+ else
+ count += zone_page_state(zone, NR_FREE_PAGES);
+ }
+ avail_normal = count;
+ count += highmem;
+ count -= totalreserve_pages;
+
+ /* Add number of pages required for page keys (s390 only). */
+ size += page_key_additional_pages(saveable);
+
+ /* Compute the maximum number of saveable pages to leave in memory. */
+ max_size = (count - (size + PAGES_FOR_IO)) / 2
+ - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE);
+ /* Compute the desired number of image pages specified by image_size. */
+ size = DIV_ROUND_UP(image_size, PAGE_SIZE);
+ if (size > max_size)
+ size = max_size;
+ /*
+ * If the desired number of image pages is at least as large as the
+ * current number of saveable pages in memory, allocate page frames for
+ * the image and we're done.
+ */
+ if (size >= saveable) {
+ pages = preallocate_image_highmem(save_highmem);
+ pages += preallocate_image_memory(saveable - pages, avail_normal);
+ goto out;
+ }
+
+ /* Estimate the minimum size of the image. */
+ pages = minimum_image_size(saveable);
+ /*
+ * To avoid excessive pressure on the normal zone, leave room in it to
+ * accommodate an image of the minimum size (unless it's already too
+ * small, in which case don't preallocate pages from it at all).
+ */
+ if (avail_normal > pages)
+ avail_normal -= pages;
+ else
+ avail_normal = 0;
+ if (size < pages)
+ size = min_t(unsigned long, pages, max_size);
+
+ /*
+ * Let the memory management subsystem know that we're going to need a
+ * large number of page frames to allocate and make it free some memory.
+ * NOTE: If this is not done, performance will be hurt badly in some
+ * test cases.
+ */
+ shrink_all_memory(saveable - size);
+
+ /*
+ * The number of saveable pages in memory was too high, so apply some
+ * pressure to decrease it. First, make room for the largest possible
+ * image and fail if that doesn't work. Next, try to decrease the size
+ * of the image as much as indicated by 'size' using allocations from
+ * highmem and non-highmem zones separately.
+ */
+ pages_highmem = preallocate_image_highmem(highmem / 2);
+ alloc = count - max_size;
+ if (alloc > pages_highmem)
+ alloc -= pages_highmem;
+ else
+ alloc = 0;
+ pages = preallocate_image_memory(alloc, avail_normal);
+ if (pages < alloc) {
+ /* We have exhausted non-highmem pages, try highmem. */
+ alloc -= pages;
+ pages += pages_highmem;
+ pages_highmem = preallocate_image_highmem(alloc);
+ if (pages_highmem < alloc)
+ goto err_out;
+ pages += pages_highmem;
+ /*
+ * size is the desired number of saveable pages to leave in
+ * memory, so try to preallocate (all memory - size) pages.
+ */
+ alloc = (count - pages) - size;
+ pages += preallocate_image_highmem(alloc);
+ } else {
+ /*
+ * There are approximately max_size saveable pages at this point
+ * and we want to reduce this number down to size.
+ */
+ alloc = max_size - size;
+ size = preallocate_highmem_fraction(alloc, highmem, count);
+ pages_highmem += size;
+ alloc -= size;
+ size = preallocate_image_memory(alloc, avail_normal);
+ pages_highmem += preallocate_image_highmem(alloc - size);
+ pages += pages_highmem + size;
+ }
+
+ /*
+ * We only need as many page frames for the image as there are saveable
+ * pages in memory, but we have allocated more. Release the excessive
+ * ones now.
+ */
+ pages -= free_unnecessary_pages();
+
+ out:
+ stop = ktime_get();
+ printk(KERN_CONT "done (allocated %lu pages)\n", pages);
+ swsusp_show_speed(start, stop, pages, "Allocated");
+
+ return 0;
+
+ err_out:
+ printk(KERN_CONT "\n");
+ swsusp_free();
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * count_pages_for_highmem - compute the number of non-highmem pages
+ * that will be necessary for creating copies of highmem pages.
+ */
+
+static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
+{
+ unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
+
+ if (free_highmem >= nr_highmem)
+ nr_highmem = 0;
+ else
+ nr_highmem -= free_highmem;
+
+ return nr_highmem;
+}
+#else
+static unsigned int
+count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * enough_free_mem - Make sure we have enough free memory for the
+ * snapshot image.
+ */
+
+static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
+{
+ struct zone *zone;
+ unsigned int free = alloc_normal;
+
+ for_each_populated_zone(zone)
+ if (!is_highmem(zone))
+ free += zone_page_state(zone, NR_FREE_PAGES);
+
+ nr_pages += count_pages_for_highmem(nr_highmem);
+ pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
+ nr_pages, PAGES_FOR_IO, free);
+
+ return free > nr_pages + PAGES_FOR_IO;
+}
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * get_highmem_buffer - if there are some highmem pages in the suspend
+ * image, we may need the buffer to copy them and/or load their data.
+ */
+
+static inline int get_highmem_buffer(int safe_needed)
+{
+ buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
+ return buffer ? 0 : -ENOMEM;
+}
+
+/**
+ * alloc_highmem_image_pages - allocate some highmem pages for the image.
+ * Try to allocate as many pages as needed, but if the number of free
+ * highmem pages is lesser than that, allocate them all.
+ */
+
+static inline unsigned int
+alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+{
+ unsigned int to_alloc = count_free_highmem_pages();
+
+ if (to_alloc > nr_highmem)
+ to_alloc = nr_highmem;
+
+ nr_highmem -= to_alloc;
+ while (to_alloc-- > 0) {
+ struct page *page;
+
+ page = alloc_image_page(__GFP_HIGHMEM);
+ memory_bm_set_bit(bm, 0, page_to_pfn(page));
+ }
+ return nr_highmem;
+}
+#else
+static inline int get_highmem_buffer(int safe_needed) { return 0; }
+
+static inline unsigned int
+alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * swsusp_alloc - allocate memory for the suspend image
+ *
+ * We first try to allocate as many highmem pages as there are
+ * saveable highmem pages in the system. If that fails, we allocate
+ * non-highmem pages for the copies of the remaining highmem ones.
+ *
+ * In this approach it is likely that the copies of highmem pages will
+ * also be located in the high memory, because of the way in which
+ * copy_data_pages() works.
+ */
+
+static int
+swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
+ unsigned int nr_pages, unsigned int nr_highmem)
+{
+ if (nr_highmem > 0) {
+ if (get_highmem_buffer(PG_ANY))
+ goto err_out;
+ if (nr_highmem > alloc_highmem) {
+ nr_highmem -= alloc_highmem;
+ nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
+ }
+ }
+ if (nr_pages > alloc_normal) {
+ nr_pages -= alloc_normal;
+ while (nr_pages-- > 0) {
+ struct page *page;
+
+ page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
+ if (!page)
+ goto err_out;
+ memory_bm_set_bit(copy_bm, 0, page_to_pfn(page));
+ }
+ }
+
+ return 0;
+
+ err_out:
+ swsusp_free();
+ return -ENOMEM;
+}
+
+asmlinkage __visible int swsusp_save(void)
+{
+ unsigned int nr_pages, nr_highmem;
+
+ if (toi_running)
+ return toi_post_context_save();
+
+ printk(KERN_INFO "PM: Creating hibernation image:\n");
+
+ drain_local_pages(NULL);
+ nr_pages = count_data_pages();
+ nr_highmem = count_highmem_pages();
+ printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
+
+ if (!enough_free_mem(nr_pages, nr_highmem)) {
+ printk(KERN_ERR "PM: Not enough free memory\n");
+ return -ENOMEM;
+ }
+
+ if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
+ printk(KERN_ERR "PM: Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /* During allocating of suspend pagedir, new cold pages may appear.
+ * Kill them.
+ */
+ drain_local_pages(NULL);
+ copy_data_pages(&copy_bm, &orig_bm);
+
+ /*
+ * End of critical section. From now on, we can write to memory,
+ * but we should not touch disk. This specially means we must _not_
+ * touch swap space! Except we must write out our image of course.
+ */
+
+ nr_pages += nr_highmem;
+ nr_copy_pages = nr_pages;
+ nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
+
+ printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
+ nr_pages);
+
+ return 0;
+}
+
+#ifndef CONFIG_ARCH_HIBERNATION_HEADER
+static int init_header_complete(struct swsusp_info *info)
+{
+ memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
+ info->version_code = LINUX_VERSION_CODE;
+ return 0;
+}
+
+char *check_image_kernel(struct swsusp_info *info)
+{
+ if (info->version_code != LINUX_VERSION_CODE)
+ return "kernel version";
+ if (strcmp(info->uts.sysname,init_utsname()->sysname))
+ return "system type";
+ if (strcmp(info->uts.release,init_utsname()->release))
+ return "kernel release";
+ if (strcmp(info->uts.version,init_utsname()->version))
+ return "version";
+ if (strcmp(info->uts.machine,init_utsname()->machine))
+ return "machine";
+ return NULL;
+}
+#endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+
+unsigned long snapshot_get_image_size(void)
+{
+ return nr_copy_pages + nr_meta_pages + 1;
+}
+
+int init_header(struct swsusp_info *info)
+{
+ memset(info, 0, sizeof(struct swsusp_info));
+ info->num_physpages = get_num_physpages();
+ info->image_pages = nr_copy_pages;
+ info->pages = snapshot_get_image_size();
+ info->size = info->pages;
+ info->size <<= PAGE_SHIFT;
+ return init_header_complete(info);
+}
+
+/**
+ * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
+ * are stored in the array @buf[] (1 page at a time)
+ */
+
+static inline void
+pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+{
+ int j;
+
+ for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+ buf[j] = memory_bm_next_pfn(bm, 0);
+ if (unlikely(buf[j] == BM_END_OF_MAP))
+ break;
+ /* Save page key for data page (s390 only). */
+ page_key_read(buf + j);
+ }
+}
+
+/**
+ * snapshot_read_next - used for reading the system memory snapshot.
+ *
+ * On the first call to it @handle should point to a zeroed
+ * snapshot_handle structure. The structure gets updated and a pointer
+ * to it should be passed to this function every next time.
+ *
+ * On success the function returns a positive number. Then, the caller
+ * is allowed to read up to the returned number of bytes from the memory
+ * location computed by the data_of() macro.
+ *
+ * The function returns 0 to indicate the end of data stream condition,
+ * and a negative number is returned on error. In such cases the
+ * structure pointed to by @handle is not updated and should not be used
+ * any more.
+ */
+
+int snapshot_read_next(struct snapshot_handle *handle)
+{
+ if (handle->cur > nr_meta_pages + nr_copy_pages)
+ return 0;
+
+ if (!buffer) {
+ /* This makes the buffer be freed by swsusp_free() */
+ buffer = get_image_page(GFP_ATOMIC, PG_ANY);
+ if (!buffer)
+ return -ENOMEM;
+ }
+ if (!handle->cur) {
+ int error;
+
+ error = init_header((struct swsusp_info *)buffer);
+ if (error)
+ return error;
+ handle->buffer = buffer;
+ memory_bm_position_reset(&orig_bm);
+ memory_bm_position_reset(&copy_bm);
+ } else if (handle->cur <= nr_meta_pages) {
+ clear_page(buffer);
+ pack_pfns(buffer, &orig_bm);
+ } else {
+ struct page *page;
+
+ page = pfn_to_page(memory_bm_next_pfn(&copy_bm, 0));
+ if (PageHighMem(page)) {
+ /* Highmem pages are copied to the buffer,
+ * because we can't return with a kmapped
+ * highmem page (we may not be called again).
+ */
+ void *kaddr;
+
+ kaddr = kmap_atomic(page);
+ copy_page(buffer, kaddr);
+ kunmap_atomic(kaddr);
+ handle->buffer = buffer;
+ } else {
+ handle->buffer = page_address(page);
+ }
+ }
+ handle->cur++;
+ return PAGE_SIZE;
+}
+
+/**
+ * mark_unsafe_pages - mark the pages that cannot be used for storing
+ * the image during resume, because they conflict with the pages that
+ * had been used before suspend
+ */
+
+static int mark_unsafe_pages(struct memory_bitmap *bm)
+{
+ struct zone *zone;
+ unsigned long pfn, max_zone_pfn;
+
+ /* Clear page flags */
+ for_each_populated_zone(zone) {
+ max_zone_pfn = zone_end_pfn(zone);
+ for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+ if (pfn_valid(pfn))
+ swsusp_unset_page_free(pfn_to_page(pfn));
+ }
+
+ /* Mark pages that correspond to the "original" pfns as "unsafe" */
+ memory_bm_position_reset(bm);
+ do {
+ pfn = memory_bm_next_pfn(bm, 0);
+ if (likely(pfn != BM_END_OF_MAP)) {
+ if (likely(pfn_valid(pfn)))
+ swsusp_set_page_free(pfn_to_page(pfn));
+ else
+ return -EFAULT;
+ }
+ } while (pfn != BM_END_OF_MAP);
+
+ allocated_unsafe_pages = 0;
+
+ return 0;
+}
+
+static void
+duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
+{
+ unsigned long pfn;
+
+ memory_bm_position_reset(src);
+ pfn = memory_bm_next_pfn(src, 0);
+ while (pfn != BM_END_OF_MAP) {
+ memory_bm_set_bit(dst, 0, pfn);
+ pfn = memory_bm_next_pfn(src, 0);
+ }
+}
+
+static int check_header(struct swsusp_info *info)
+{
+ char *reason;
+
+ reason = check_image_kernel(info);
+ if (!reason && info->num_physpages != get_num_physpages())
+ reason = "memory size";
+ if (reason) {
+ printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
+ return -EPERM;
+ }
+ return 0;
+}
+
+/**
+ * load header - check the image header and copy data from it
+ */
+
+static int
+load_header(struct swsusp_info *info)
+{
+ int error;
+
+ restore_pblist = NULL;
+ error = check_header(info);
+ if (!error) {
+ nr_copy_pages = info->image_pages;
+ nr_meta_pages = info->pages - info->image_pages - 1;
+ }
+ return error;
+}
+
+/**
+ * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
+ * the corresponding bit in the memory bitmap @bm
+ */
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+{
+ int j;
+
+ for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
+ if (unlikely(buf[j] == BM_END_OF_MAP))
+ break;
+
+ /* Extract and buffer page key for data page (s390 only). */
+ page_key_memorize(buf + j);
+
+ if (memory_bm_pfn_present(bm, 0, buf[j]))
+ memory_bm_set_bit(bm, 0, buf[j]);
+ else
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/* List of "safe" pages that may be used to store data loaded from the suspend
+ * image
+ */
+static struct linked_page *safe_pages_list;
+
+#ifdef CONFIG_HIGHMEM
+/* struct highmem_pbe is used for creating the list of highmem pages that
+ * should be restored atomically during the resume from disk, because the page
+ * frames they have occupied before the suspend are in use.
+ */
+struct highmem_pbe {
+ struct page *copy_page; /* data is here now */
+ struct page *orig_page; /* data was here before the suspend */
+ struct highmem_pbe *next;
+};
+
+/* List of highmem PBEs needed for restoring the highmem pages that were
+ * allocated before the suspend and included in the suspend image, but have
+ * also been allocated by the "resume" kernel, so their contents cannot be
+ * written directly to their "original" page frames.
+ */
+static struct highmem_pbe *highmem_pblist;
+
+/**
+ * count_highmem_image_pages - compute the number of highmem pages in the
+ * suspend image. The bits in the memory bitmap @bm that correspond to the
+ * image pages are assumed to be set.
+ */
+
+static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
+{
+ unsigned long pfn;
+ unsigned int cnt = 0;
+
+ memory_bm_position_reset(bm);
+ pfn = memory_bm_next_pfn(bm, 0);
+ while (pfn != BM_END_OF_MAP) {
+ if (PageHighMem(pfn_to_page(pfn)))
+ cnt++;
+
+ pfn = memory_bm_next_pfn(bm, 0);
+ }
+ return cnt;
+}
+
+/**
+ * prepare_highmem_image - try to allocate as many highmem pages as
+ * there are highmem image pages (@nr_highmem_p points to the variable
+ * containing the number of highmem image pages). The pages that are
+ * "safe" (ie. will not be overwritten when the suspend image is
+ * restored) have the corresponding bits set in @bm (it must be
+ * unitialized).
+ *
+ * NOTE: This function should not be called if there are no highmem
+ * image pages.
+ */
+
+static unsigned int safe_highmem_pages;
+
+static struct memory_bitmap *safe_highmem_bm;
+
+static int
+prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+{
+ unsigned int to_alloc;
+
+ if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
+ return -ENOMEM;
+
+ if (get_highmem_buffer(PG_SAFE))
+ return -ENOMEM;
+
+ to_alloc = count_free_highmem_pages();
+ if (to_alloc > *nr_highmem_p)
+ to_alloc = *nr_highmem_p;
+ else
+ *nr_highmem_p = to_alloc;
+
+ safe_highmem_pages = 0;
+ while (to_alloc-- > 0) {
+ struct page *page;
+
+ page = alloc_page(__GFP_HIGHMEM);
+ if (!swsusp_page_is_free(page)) {
+ /* The page is "safe", set its bit the bitmap */
+ memory_bm_set_bit(bm, 0, page_to_pfn(page));
+ safe_highmem_pages++;
+ }
+ /* Mark the page as allocated */
+ swsusp_set_page_forbidden(page);
+ swsusp_set_page_free(page);
+ }
+ memory_bm_position_reset(bm);
+ safe_highmem_bm = bm;
+ return 0;
+}
+
+/**
+ * get_highmem_page_buffer - for given highmem image page find the buffer
+ * that suspend_write_next() should set for its caller to write to.
+ *
+ * If the page is to be saved to its "original" page frame or a copy of
+ * the page is to be made in the highmem, @buffer is returned. Otherwise,
+ * the copy of the page is to be made in normal memory, so the address of
+ * the copy is returned.
+ *
+ * If @buffer is returned, the caller of suspend_write_next() will write
+ * the page's contents to @buffer, so they will have to be copied to the
+ * right location on the next call to suspend_write_next() and it is done
+ * with the help of copy_last_highmem_page(). For this purpose, if
+ * @buffer is returned, @last_highmem page is set to the page to which
+ * the data will have to be copied from @buffer.
+ */
+
+static struct page *last_highmem_page;
+
+static void *
+get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+{
+ struct highmem_pbe *pbe;
+ void *kaddr;
+
+ if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
+ /* We have allocated the "original" page frame and we can
+ * use it directly to store the loaded page.
+ */
+ last_highmem_page = page;
+ return buffer;
+ }
+ /* The "original" page frame has not been allocated and we have to
+ * use a "safe" page frame to store the loaded page.
+ */
+ pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
+ if (!pbe) {
+ swsusp_free();
+ return ERR_PTR(-ENOMEM);
+ }
+ pbe->orig_page = page;
+ if (safe_highmem_pages > 0) {
+ struct page *tmp;
+
+ /* Copy of the page will be stored in high memory */
+ kaddr = buffer;
+ tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm, 0));
+ safe_highmem_pages--;
+ last_highmem_page = tmp;
+ pbe->copy_page = tmp;
+ } else {
+ /* Copy of the page will be stored in normal memory */
+ kaddr = safe_pages_list;
+ safe_pages_list = safe_pages_list->next;
+ pbe->copy_page = virt_to_page(kaddr);
+ }
+ pbe->next = highmem_pblist;
+ highmem_pblist = pbe;
+ return kaddr;
+}
+
+/**
+ * copy_last_highmem_page - copy the contents of a highmem image from
+ * @buffer, where the caller of snapshot_write_next() has place them,
+ * to the right location represented by @last_highmem_page .
+ */
+
+static void copy_last_highmem_page(void)
+{
+ if (last_highmem_page) {
+ void *dst;
+
+ dst = kmap_atomic(last_highmem_page);
+ copy_page(dst, buffer);
+ kunmap_atomic(dst);
+ last_highmem_page = NULL;
+ }
+}
+
+static inline int last_highmem_page_copied(void)
+{
+ return !last_highmem_page;
+}
+
+static inline void free_highmem_data(void)
+{
+ if (safe_highmem_bm)
+ memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
+
+ if (buffer)
+ free_image_page(buffer, PG_UNSAFE_CLEAR);
+}
+#else
+static unsigned int
+count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
+
+static inline int
+prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+{
+ return 0;
+}
+
+static inline void *
+get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline void copy_last_highmem_page(void) {}
+static inline int last_highmem_page_copied(void) { return 1; }
+static inline void free_highmem_data(void) {}
+#endif /* CONFIG_HIGHMEM */
+
+/**
+ * prepare_image - use the memory bitmap @bm to mark the pages that will
+ * be overwritten in the process of restoring the system memory state
+ * from the suspend image ("unsafe" pages) and allocate memory for the
+ * image.
+ *
+ * The idea is to allocate a new memory bitmap first and then allocate
+ * as many pages as needed for the image data, but not to assign these
+ * pages to specific tasks initially. Instead, we just mark them as
+ * allocated and create a lists of "safe" pages that will be used
+ * later. On systems with high memory a list of "safe" highmem pages is
+ * also created.
+ */
+
+#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
+
+static int
+prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+{
+ unsigned int nr_pages, nr_highmem;
+ struct linked_page *sp_list, *lp;
+ int error;
+
+ /* If there is no highmem, the buffer will not be necessary */
+ free_image_page(buffer, PG_UNSAFE_CLEAR);
+ buffer = NULL;
+
+ nr_highmem = count_highmem_image_pages(bm);
+ error = mark_unsafe_pages(bm);
+ if (error)
+ goto Free;
+
+ error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
+ if (error)
+ goto Free;
+
+ duplicate_memory_bitmap(new_bm, bm);
+ memory_bm_free(bm, PG_UNSAFE_KEEP);
+ if (nr_highmem > 0) {
+ error = prepare_highmem_image(bm, &nr_highmem);
+ if (error)
+ goto Free;
+ }
+ /* Reserve some safe pages for potential later use.
+ *
+ * NOTE: This way we make sure there will be enough safe pages for the
+ * chain_alloc() in get_buffer(). It is a bit wasteful, but
+ * nr_copy_pages cannot be greater than 50% of the memory anyway.
+ */
+ sp_list = NULL;
+ /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
+ nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+ nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
+ while (nr_pages > 0) {
+ lp = get_image_page(GFP_ATOMIC, PG_SAFE);
+ if (!lp) {
+ error = -ENOMEM;
+ goto Free;
+ }
+ lp->next = sp_list;
+ sp_list = lp;
+ nr_pages--;
+ }
+ /* Preallocate memory for the image */
+ safe_pages_list = NULL;
+ nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+ while (nr_pages > 0) {
+ lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
+ if (!lp) {
+ error = -ENOMEM;
+ goto Free;
+ }
+ if (!swsusp_page_is_free(virt_to_page(lp))) {
+ /* The page is "safe", add it to the list */
+ lp->next = safe_pages_list;
+ safe_pages_list = lp;
+ }
+ /* Mark the page as allocated */
+ swsusp_set_page_forbidden(virt_to_page(lp));
+ swsusp_set_page_free(virt_to_page(lp));
+ nr_pages--;
+ }
+ /* Free the reserved safe pages so that chain_alloc() can use them */
+ while (sp_list) {
+ lp = sp_list->next;
+ free_image_page(sp_list, PG_UNSAFE_CLEAR);
+ sp_list = lp;
+ }
+ return 0;
+
+ Free:
+ swsusp_free();
+ return error;
+}
+
+/**
+ * get_buffer - compute the address that snapshot_write_next() should
+ * set for its caller to write to.
+ */
+
+static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
+{
+ struct pbe *pbe;
+ struct page *page;
+ unsigned long pfn = memory_bm_next_pfn(bm, 0);
+
+ if (pfn == BM_END_OF_MAP)
+ return ERR_PTR(-EFAULT);
+
+ page = pfn_to_page(pfn);
+ if (PageHighMem(page))
+ return get_highmem_page_buffer(page, ca);
+
+ if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
+ /* We have allocated the "original" page frame and we can
+ * use it directly to store the loaded page.
+ */
+ return page_address(page);
+
+ /* The "original" page frame has not been allocated and we have to
+ * use a "safe" page frame to store the loaded page.
+ */
+ pbe = chain_alloc(ca, sizeof(struct pbe));
+ if (!pbe) {
+ swsusp_free();
+ return ERR_PTR(-ENOMEM);
+ }
+ pbe->orig_address = page_address(page);
+ pbe->address = safe_pages_list;
+ safe_pages_list = safe_pages_list->next;
+ pbe->next = restore_pblist;
+ restore_pblist = pbe;
+ return pbe->address;
+}
+
+/**
+ * snapshot_write_next - used for writing the system memory snapshot.
+ *
+ * On the first call to it @handle should point to a zeroed
+ * snapshot_handle structure. The structure gets updated and a pointer
+ * to it should be passed to this function every next time.
+ *
+ * On success the function returns a positive number. Then, the caller
+ * is allowed to write up to the returned number of bytes to the memory
+ * location computed by the data_of() macro.
+ *
+ * The function returns 0 to indicate the "end of file" condition,
+ * and a negative number is returned on error. In such cases the
+ * structure pointed to by @handle is not updated and should not be used
+ * any more.
+ */
+
+int snapshot_write_next(struct snapshot_handle *handle)
+{
+ static struct chain_allocator ca;
+ int error = 0;
+
+ /* Check if we have already loaded the entire image */
+ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+ return 0;
+
+ handle->sync_read = 1;
+
+ if (!handle->cur) {
+ if (!buffer)
+ /* This makes the buffer be freed by swsusp_free() */
+ buffer = get_image_page(GFP_ATOMIC, PG_ANY);
+
+ if (!buffer)
+ return -ENOMEM;
+
+ handle->buffer = buffer;
+ } else if (handle->cur == 1) {
+ error = load_header(buffer);
+ if (error)
+ return error;
+
+ error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
+ if (error)
+ return error;
+
+ /* Allocate buffer for page keys. */
+ error = page_key_alloc(nr_copy_pages);
+ if (error)
+ return error;
+
+ } else if (handle->cur <= nr_meta_pages + 1) {
+ error = unpack_orig_pfns(buffer, &copy_bm);
+ if (error)
+ return error;
+
+ if (handle->cur == nr_meta_pages + 1) {
+ error = prepare_image(&orig_bm, &copy_bm);
+ if (error)
+ return error;
+
+ chain_init(&ca, GFP_ATOMIC, PG_SAFE);
+ memory_bm_position_reset(&orig_bm);
+ restore_pblist = NULL;
+ handle->buffer = get_buffer(&orig_bm, &ca);
+ handle->sync_read = 0;
+ if (IS_ERR(handle->buffer))
+ return PTR_ERR(handle->buffer);
+ }
+ } else {
+ copy_last_highmem_page();
+ /* Restore page key for data page (s390 only). */
+ page_key_write(handle->buffer);
+ handle->buffer = get_buffer(&orig_bm, &ca);
+ if (IS_ERR(handle->buffer))
+ return PTR_ERR(handle->buffer);
+ if (handle->buffer != buffer)
+ handle->sync_read = 0;
+ }
+ handle->cur++;
+ return PAGE_SIZE;
+}
+
+/**
+ * snapshot_write_finalize - must be called after the last call to
+ * snapshot_write_next() in case the last page in the image happens
+ * to be a highmem page and its contents should be stored in the
+ * highmem. Additionally, it releases the memory that will not be
+ * used any more.
+ */
+
+void snapshot_write_finalize(struct snapshot_handle *handle)
+{
+ copy_last_highmem_page();
+ /* Restore page key for data page (s390 only). */
+ page_key_write(handle->buffer);
+ page_key_free();
+ /* Free only if we have loaded the image entirely */
+ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+ memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+ free_highmem_data();
+ }
+}
+
+int snapshot_image_loaded(struct snapshot_handle *handle)
+{
+ return !(!nr_copy_pages || !last_highmem_page_copied() ||
+ handle->cur <= nr_meta_pages + nr_copy_pages);
+}
+
+#ifdef CONFIG_HIGHMEM
+/* Assumes that @buf is ready and points to a "safe" page */
+static inline void
+swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
+{
+ void *kaddr1, *kaddr2;
+
+ kaddr1 = kmap_atomic(p1);
+ kaddr2 = kmap_atomic(p2);
+ copy_page(buf, kaddr1);
+ copy_page(kaddr1, kaddr2);
+ copy_page(kaddr2, buf);
+ kunmap_atomic(kaddr2);
+ kunmap_atomic(kaddr1);
+}
+
+/**
+ * restore_highmem - for each highmem page that was allocated before
+ * the suspend and included in the suspend image, and also has been
+ * allocated by the "resume" kernel swap its current (ie. "before
+ * resume") contents with the previous (ie. "before suspend") one.
+ *
+ * If the resume eventually fails, we can call this function once
+ * again and restore the "before resume" highmem state.
+ */
+
+int restore_highmem(void)
+{
+ struct highmem_pbe *pbe = highmem_pblist;
+ void *buf;
+
+ if (!pbe)
+ return 0;
+
+ buf = get_image_page(GFP_ATOMIC, PG_SAFE);
+ if (!buf)
+ return -ENOMEM;
+
+ while (pbe) {
+ swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
+ pbe = pbe->next;
+ }
+ free_image_page(buf, PG_UNSAFE_CLEAR);
+ return 0;
+}
+#endif /* CONFIG_HIGHMEM */
+
+struct memory_bitmap *pageset1_map, *pageset2_map, *free_map, *nosave_map,
+ *pageset1_copy_map, *io_map, *page_resave_map, *compare_map;
+
+int resume_attempted;
+
+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
+{
+ int result;
+
+ memory_bm_position_reset(bm);
+
+ do {
+ result = rw_chunk(WRITE, NULL, (char *) bm->cur[0].node->data, PAGE_SIZE);
+
+ if (result)
+ return result;
+ } while (rtree_next_node(bm, 0));
+ return 0;
+}
+
+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size))
+{
+ int result;
+
+ memory_bm_position_reset(bm);
+
+ do {
+ result = rw_chunk(READ, NULL, (char *) bm->cur[0].node->data, PAGE_SIZE);
+
+ if (result)
+ return result;
+
+ } while (rtree_next_node(bm, 0));
+ return 0;
+}
+
+int memory_bm_space_needed(struct memory_bitmap *bm)
+{
+ unsigned long bytes = 0;
+
+ memory_bm_position_reset(bm);
+ do {
+ bytes += PAGE_SIZE;
+ } while (rtree_next_node(bm, 0));
+ return bytes;
+}
+
+int toi_alloc_bitmap(struct memory_bitmap **bm)
+{
+ int error;
+ struct memory_bitmap *bm1;
+
+ bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
+ if (!bm1)
+ return -ENOMEM;
+
+ error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
+ if (error) {
+ printk("Error returned - %d.\n", error);
+ kfree(bm1);
+ return -ENOMEM;
+ }
+
+ *bm = bm1;
+ return 0;
+}
+
+void toi_free_bitmap(struct memory_bitmap **bm)
+{
+ if (!*bm)
+ return;
+
+ memory_bm_free(*bm, 0);
+ kfree(*bm);
+ *bm = NULL;
+}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
new file mode 100644
index 000000000..8d7a1ef72
--- /dev/null
+++ b/kernel/power/suspend.c
@@ -0,0 +1,536 @@
+/*
+ * kernel/power/suspend.c - Suspend to RAM and standby functionality.
+ *
+ * Copyright (c) 2003 Patrick Mochel
+ * Copyright (c) 2003 Open Source Development Lab
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/cpuidle.h>
+#include <linux/syscalls.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
+#include <linux/ftrace.h>
+#include <trace/events/power.h>
+#include <linux/compiler.h>
+#include <linux/moduleparam.h>
+
+#include "power.h"
+
+const char *pm_labels[] = { "mem", "standby", "freeze", NULL };
+const char *pm_states[PM_SUSPEND_MAX];
+
+static const struct platform_suspend_ops *suspend_ops;
+static const struct platform_freeze_ops *freeze_ops;
+static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
+
+enum freeze_state __read_mostly suspend_freeze_state;
+static DEFINE_SPINLOCK(suspend_freeze_lock);
+
+void freeze_set_ops(const struct platform_freeze_ops *ops)
+{
+ lock_system_sleep();
+ freeze_ops = ops;
+ unlock_system_sleep();
+}
+
+static void freeze_begin(void)
+{
+ suspend_freeze_state = FREEZE_STATE_NONE;
+}
+
+static void freeze_enter(void)
+{
+ spin_lock_irq(&suspend_freeze_lock);
+ if (pm_wakeup_pending())
+ goto out;
+
+ suspend_freeze_state = FREEZE_STATE_ENTER;
+ spin_unlock_irq(&suspend_freeze_lock);
+
+ get_online_cpus();
+ cpuidle_resume();
+
+ /* Push all the CPUs into the idle loop. */
+ wake_up_all_idle_cpus();
+ pr_debug("PM: suspend-to-idle\n");
+ /* Make the current CPU wait so it can enter the idle loop too. */
+ wait_event(suspend_freeze_wait_head,
+ suspend_freeze_state == FREEZE_STATE_WAKE);
+ pr_debug("PM: resume from suspend-to-idle\n");
+
+ cpuidle_pause();
+ put_online_cpus();
+
+ spin_lock_irq(&suspend_freeze_lock);
+
+ out:
+ suspend_freeze_state = FREEZE_STATE_NONE;
+ spin_unlock_irq(&suspend_freeze_lock);
+}
+
+void freeze_wake(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&suspend_freeze_lock, flags);
+ if (suspend_freeze_state > FREEZE_STATE_NONE) {
+ suspend_freeze_state = FREEZE_STATE_WAKE;
+ wake_up(&suspend_freeze_wait_head);
+ }
+ spin_unlock_irqrestore(&suspend_freeze_lock, flags);
+}
+EXPORT_SYMBOL_GPL(freeze_wake);
+
+static bool valid_state(suspend_state_t state)
+{
+ /*
+ * PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level
+ * support and need to be valid to the low level
+ * implementation, no valid callback implies that none are valid.
+ */
+ return suspend_ops && suspend_ops->valid && suspend_ops->valid(state);
+}
+
+/*
+ * If this is set, the "mem" label always corresponds to the deepest sleep state
+ * available, the "standby" label corresponds to the second deepest sleep state
+ * available (if any), and the "freeze" label corresponds to the remaining
+ * available sleep state (if there is one).
+ */
+static bool relative_states;
+
+static int __init sleep_states_setup(char *str)
+{
+ relative_states = !strncmp(str, "1", 1);
+ pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
+ return 1;
+}
+
+__setup("relative_sleep_states=", sleep_states_setup);
+
+/**
+ * suspend_set_ops - Set the global suspend method table.
+ * @ops: Suspend operations to use.
+ */
+void suspend_set_ops(const struct platform_suspend_ops *ops)
+{
+ suspend_state_t i;
+ int j = 0;
+
+ lock_system_sleep();
+
+ suspend_ops = ops;
+ for (i = PM_SUSPEND_MEM; i >= PM_SUSPEND_STANDBY; i--)
+ if (valid_state(i)) {
+ pm_states[i] = pm_labels[j++];
+ } else if (!relative_states) {
+ pm_states[i] = NULL;
+ j++;
+ }
+
+ pm_states[PM_SUSPEND_FREEZE] = pm_labels[j];
+
+ unlock_system_sleep();
+}
+EXPORT_SYMBOL_GPL(suspend_set_ops);
+
+/**
+ * suspend_valid_only_mem - Generic memory-only valid callback.
+ *
+ * Platform drivers that implement mem suspend only and only need to check for
+ * that in their .valid() callback can use this instead of rolling their own
+ * .valid() callback.
+ */
+int suspend_valid_only_mem(suspend_state_t state)
+{
+ return state == PM_SUSPEND_MEM;
+}
+EXPORT_SYMBOL_GPL(suspend_valid_only_mem);
+
+static bool sleep_state_supported(suspend_state_t state)
+{
+ return state == PM_SUSPEND_FREEZE || (suspend_ops && suspend_ops->enter);
+}
+
+static int platform_suspend_prepare(suspend_state_t state)
+{
+ return state != PM_SUSPEND_FREEZE && suspend_ops->prepare ?
+ suspend_ops->prepare() : 0;
+}
+
+static int platform_suspend_prepare_late(suspend_state_t state)
+{
+ return state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->prepare ?
+ freeze_ops->prepare() : 0;
+}
+
+static int platform_suspend_prepare_noirq(suspend_state_t state)
+{
+ return state != PM_SUSPEND_FREEZE && suspend_ops->prepare_late ?
+ suspend_ops->prepare_late() : 0;
+}
+
+static void platform_resume_noirq(suspend_state_t state)
+{
+ if (state != PM_SUSPEND_FREEZE && suspend_ops->wake)
+ suspend_ops->wake();
+}
+
+static void platform_resume_early(suspend_state_t state)
+{
+ if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->restore)
+ freeze_ops->restore();
+}
+
+static void platform_resume_finish(suspend_state_t state)
+{
+ if (state != PM_SUSPEND_FREEZE && suspend_ops->finish)
+ suspend_ops->finish();
+}
+
+static int platform_suspend_begin(suspend_state_t state)
+{
+ if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin)
+ return freeze_ops->begin();
+ else if (suspend_ops->begin)
+ return suspend_ops->begin(state);
+ else
+ return 0;
+}
+
+static void platform_resume_end(suspend_state_t state)
+{
+ if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
+ freeze_ops->end();
+ else if (suspend_ops->end)
+ suspend_ops->end();
+}
+
+static void platform_recover(suspend_state_t state)
+{
+ if (state != PM_SUSPEND_FREEZE && suspend_ops->recover)
+ suspend_ops->recover();
+}
+
+static bool platform_suspend_again(suspend_state_t state)
+{
+ return state != PM_SUSPEND_FREEZE && suspend_ops->suspend_again ?
+ suspend_ops->suspend_again() : false;
+}
+
+#ifdef CONFIG_PM_DEBUG
+static unsigned int pm_test_delay = 5;
+module_param(pm_test_delay, uint, 0644);
+MODULE_PARM_DESC(pm_test_delay,
+ "Number of seconds to wait before resuming from suspend test");
+#endif
+
+static int suspend_test(int level)
+{
+#ifdef CONFIG_PM_DEBUG
+ if (pm_test_level == level) {
+ printk(KERN_INFO "suspend debug: Waiting for %d second(s).\n",
+ pm_test_delay);
+ mdelay(pm_test_delay * 1000);
+ return 1;
+ }
+#endif /* !CONFIG_PM_DEBUG */
+ return 0;
+}
+
+/**
+ * suspend_prepare - Prepare for entering system sleep state.
+ *
+ * Common code run for every system sleep state that can be entered (except for
+ * hibernation). Run suspend notifiers, allocate the "suspend" console and
+ * freeze processes.
+ */
+static int suspend_prepare(suspend_state_t state)
+{
+ int error;
+
+ if (!sleep_state_supported(state))
+ return -EPERM;
+
+ pm_prepare_console();
+
+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+ if (error)
+ goto Finish;
+
+ trace_suspend_resume(TPS("freeze_processes"), 0, true);
+ error = suspend_freeze_processes();
+ trace_suspend_resume(TPS("freeze_processes"), 0, false);
+ if (!error)
+ return 0;
+
+ suspend_stats.failed_freeze++;
+ dpm_save_failed_step(SUSPEND_FREEZE);
+ Finish:
+ pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
+ return error;
+}
+
+/* default implementation */
+void __weak arch_suspend_disable_irqs(void)
+{
+ local_irq_disable();
+}
+
+/* default implementation */
+void __weak arch_suspend_enable_irqs(void)
+{
+ local_irq_enable();
+}
+
+/**
+ * suspend_enter - Make the system enter the given sleep state.
+ * @state: System sleep state to enter.
+ * @wakeup: Returns information that the sleep state should not be re-entered.
+ *
+ * This function should be called after devices have been suspended.
+ */
+static int suspend_enter(suspend_state_t state, bool *wakeup)
+{
+ int error;
+
+ error = platform_suspend_prepare(state);
+ if (error)
+ goto Platform_finish;
+
+ error = dpm_suspend_late(PMSG_SUSPEND);
+ if (error) {
+ printk(KERN_ERR "PM: late suspend of devices failed\n");
+ goto Platform_finish;
+ }
+ error = platform_suspend_prepare_late(state);
+ if (error)
+ goto Devices_early_resume;
+
+ error = dpm_suspend_noirq(PMSG_SUSPEND);
+ if (error) {
+ printk(KERN_ERR "PM: noirq suspend of devices failed\n");
+ goto Platform_early_resume;
+ }
+ error = platform_suspend_prepare_noirq(state);
+ if (error)
+ goto Platform_wake;
+
+ if (suspend_test(TEST_PLATFORM))
+ goto Platform_wake;
+
+ /*
+ * PM_SUSPEND_FREEZE equals
+ * frozen processes + suspended devices + idle processors.
+ * Thus we should invoke freeze_enter() soon after
+ * all the devices are suspended.
+ */
+ if (state == PM_SUSPEND_FREEZE) {
+ trace_suspend_resume(TPS("machine_suspend"), state, true);
+ freeze_enter();
+ trace_suspend_resume(TPS("machine_suspend"), state, false);
+ goto Platform_wake;
+ }
+
+ error = disable_nonboot_cpus();
+ if (error || suspend_test(TEST_CPUS))
+ goto Enable_cpus;
+
+ arch_suspend_disable_irqs();
+ BUG_ON(!irqs_disabled());
+
+ error = syscore_suspend();
+ if (!error) {
+ *wakeup = pm_wakeup_pending();
+ if (!(suspend_test(TEST_CORE) || *wakeup)) {
+ trace_suspend_resume(TPS("machine_suspend"),
+ state, true);
+ error = suspend_ops->enter(state);
+ trace_suspend_resume(TPS("machine_suspend"),
+ state, false);
+ events_check_enabled = false;
+ }
+ syscore_resume();
+ }
+
+ arch_suspend_enable_irqs();
+ BUG_ON(irqs_disabled());
+
+ Enable_cpus:
+ enable_nonboot_cpus();
+
+ Platform_wake:
+ platform_resume_noirq(state);
+ dpm_resume_noirq(PMSG_RESUME);
+
+ Platform_early_resume:
+ platform_resume_early(state);
+
+ Devices_early_resume:
+ dpm_resume_early(PMSG_RESUME);
+
+ Platform_finish:
+ platform_resume_finish(state);
+ return error;
+}
+
+/**
+ * suspend_devices_and_enter - Suspend devices and enter system sleep state.
+ * @state: System sleep state to enter.
+ */
+int suspend_devices_and_enter(suspend_state_t state)
+{
+ int error;
+ bool wakeup = false;
+
+ if (!sleep_state_supported(state))
+ return -ENOSYS;
+
+ error = platform_suspend_begin(state);
+ if (error)
+ goto Close;
+
+ suspend_console();
+ suspend_test_start();
+ error = dpm_suspend_start(PMSG_SUSPEND);
+ if (error) {
+ pr_err("PM: Some devices failed to suspend, or early wake event detected\n");
+ goto Recover_platform;
+ }
+ suspend_test_finish("suspend devices");
+ if (suspend_test(TEST_DEVICES))
+ goto Recover_platform;
+
+ do {
+ error = suspend_enter(state, &wakeup);
+ } while (!error && !wakeup && platform_suspend_again(state));
+
+ Resume_devices:
+ suspend_test_start();
+ dpm_resume_end(PMSG_RESUME);
+ suspend_test_finish("resume devices");
+ trace_suspend_resume(TPS("resume_console"), state, true);
+ resume_console();
+ trace_suspend_resume(TPS("resume_console"), state, false);
+
+ Close:
+ platform_resume_end(state);
+ return error;
+
+ Recover_platform:
+ platform_recover(state);
+ goto Resume_devices;
+}
+
+/**
+ * suspend_finish - Clean up before finishing the suspend sequence.
+ *
+ * Call platform code to clean up, restart processes, and free the console that
+ * we've allocated. This routine is not called for hibernation.
+ */
+static void suspend_finish(void)
+{
+ suspend_thaw_processes();
+ pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
+}
+
+/**
+ * enter_state - Do common work needed to enter system sleep state.
+ * @state: System sleep state to enter.
+ *
+ * Make sure that no one else is trying to put the system into a sleep state.
+ * Fail if that's not the case. Otherwise, prepare for system suspend, make the
+ * system enter the given sleep state and clean up after wakeup.
+ */
+static int enter_state(suspend_state_t state)
+{
+ int error;
+
+ trace_suspend_resume(TPS("suspend_enter"), state, true);
+ if (state == PM_SUSPEND_FREEZE) {
+#ifdef CONFIG_PM_DEBUG
+ if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
+ pr_warning("PM: Unsupported test mode for freeze state,"
+ "please choose none/freezer/devices/platform.\n");
+ return -EAGAIN;
+ }
+#endif
+ } else if (!valid_state(state)) {
+ return -EINVAL;
+ }
+ if (!mutex_trylock(&pm_mutex))
+ return -EBUSY;
+
+ if (state == PM_SUSPEND_FREEZE)
+ freeze_begin();
+
+ trace_suspend_resume(TPS("sync_filesystems"), 0, true);
+ printk(KERN_INFO "PM: Syncing filesystems ... ");
+ sys_sync();
+ printk("done.\n");
+ trace_suspend_resume(TPS("sync_filesystems"), 0, false);
+
+ pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
+ error = suspend_prepare(state);
+ if (error)
+ goto Unlock;
+
+ if (suspend_test(TEST_FREEZER))
+ goto Finish;
+
+ trace_suspend_resume(TPS("suspend_enter"), state, false);
+ pr_debug("PM: Entering %s sleep\n", pm_states[state]);
+ pm_restrict_gfp_mask();
+ error = suspend_devices_and_enter(state);
+ pm_restore_gfp_mask();
+
+ Finish:
+ pr_debug("PM: Finishing wakeup.\n");
+ suspend_finish();
+ Unlock:
+ mutex_unlock(&pm_mutex);
+ return error;
+}
+
+/**
+ * pm_suspend - Externally visible function for suspending the system.
+ * @state: System sleep state to enter.
+ *
+ * Check if the value of @state represents one of the supported states,
+ * execute enter_state() and update system suspend statistics.
+ */
+int pm_suspend(suspend_state_t state)
+{
+ int error;
+
+ if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
+ return -EINVAL;
+
+ error = enter_state(state);
+ if (error) {
+ suspend_stats.fail++;
+ dpm_save_failed_errno(error);
+ } else {
+ suspend_stats.success++;
+ }
+ return error;
+}
+EXPORT_SYMBOL(pm_suspend);
diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c
new file mode 100644
index 000000000..084452e34
--- /dev/null
+++ b/kernel/power/suspend_test.c
@@ -0,0 +1,218 @@
+/*
+ * kernel/power/suspend_test.c - Suspend to RAM and standby test facility.
+ *
+ * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/rtc.h>
+
+#include "power.h"
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending. Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS 10
+
+static unsigned long suspend_test_start_time;
+static u32 test_repeat_count_max = 1;
+static u32 test_repeat_count_current;
+
+void suspend_test_start(void)
+{
+ /* FIXME Use better timebase than "jiffies", ideally a clocksource.
+ * What we want is a hardware counter that will work correctly even
+ * during the irqs-are-off stages of the suspend/resume cycle...
+ */
+ suspend_test_start_time = jiffies;
+}
+
+void suspend_test_finish(const char *label)
+{
+ long nj = jiffies - suspend_test_start_time;
+ unsigned msec;
+
+ msec = jiffies_to_msecs(abs(nj));
+ pr_info("PM: %s took %d.%03d seconds\n", label,
+ msec / 1000, msec % 1000);
+
+ /* Warning on suspend means the RTC alarm period needs to be
+ * larger -- the system was sooo slooowwww to suspend that the
+ * alarm (should have) fired before the system went to sleep!
+ *
+ * Warning on either suspend or resume also means the system
+ * has some performance issues. The stack dump of a WARN_ON
+ * is more likely to get the right attention than a printk...
+ */
+ WARN(msec > (TEST_SUSPEND_SECONDS * 1000),
+ "Component: %s, time: %u\n", label, msec);
+}
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system. RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+ static char err_readtime[] __initdata =
+ KERN_ERR "PM: can't read %s time, err %d\n";
+ static char err_wakealarm [] __initdata =
+ KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+ static char err_suspend[] __initdata =
+ KERN_ERR "PM: suspend test failed, error %d\n";
+ static char info_test[] __initdata =
+ KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+ unsigned long now;
+ struct rtc_wkalrm alm;
+ int status;
+
+ /* this may fail if the RTC hasn't been initialized */
+repeat:
+ status = rtc_read_time(rtc, &alm.time);
+ if (status < 0) {
+ printk(err_readtime, dev_name(&rtc->dev), status);
+ return;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+
+ memset(&alm, 0, sizeof alm);
+ rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+ alm.enabled = true;
+
+ status = rtc_set_alarm(rtc, &alm);
+ if (status < 0) {
+ printk(err_wakealarm, dev_name(&rtc->dev), status);
+ return;
+ }
+
+ if (state == PM_SUSPEND_MEM) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ if (status == -ENODEV)
+ state = PM_SUSPEND_STANDBY;
+ }
+ if (state == PM_SUSPEND_STANDBY) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ if (status < 0)
+ state = PM_SUSPEND_FREEZE;
+ }
+ if (state == PM_SUSPEND_FREEZE) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ }
+
+ if (status < 0)
+ printk(err_suspend, status);
+
+ test_repeat_count_current++;
+ if (test_repeat_count_current < test_repeat_count_max)
+ goto repeat;
+
+ /* Some platforms can't detect that the alarm triggered the
+ * wakeup, or (accordingly) disable it after it afterwards.
+ * It's supposed to give oneshot behavior; cope.
+ */
+ alm.enabled = false;
+ rtc_set_alarm(rtc, &alm);
+}
+
+static int __init has_wakealarm(struct device *dev, const void *data)
+{
+ struct rtc_device *candidate = to_rtc_device(dev);
+
+ if (!candidate->ops->set_alarm)
+ return 0;
+ if (!device_may_wakeup(candidate->dev.parent))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time. They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static const char *test_state_label __initdata;
+
+static char warn_bad_state[] __initdata =
+ KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+ int i;
+ char *repeat;
+ char *suspend_type;
+
+ /* example : "=mem[,N]" ==> "mem[,N]" */
+ value++;
+ suspend_type = strsep(&value, ",");
+ if (!suspend_type)
+ return 0;
+
+ repeat = strsep(&value, ",");
+ if (repeat) {
+ if (kstrtou32(repeat, 0, &test_repeat_count_max))
+ return 0;
+ }
+
+ for (i = 0; pm_labels[i]; i++)
+ if (!strcmp(pm_labels[i], suspend_type)) {
+ test_state_label = pm_labels[i];
+ return 0;
+ }
+
+ printk(warn_bad_state, suspend_type);
+ return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+ static char warn_no_rtc[] __initdata =
+ KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+ struct rtc_device *rtc = NULL;
+ struct device *dev;
+ suspend_state_t test_state;
+
+ /* PM is initialized by now; is that state testable? */
+ if (!test_state_label)
+ return 0;
+
+ for (test_state = PM_SUSPEND_MIN; test_state < PM_SUSPEND_MAX; test_state++) {
+ const char *state_label = pm_states[test_state];
+
+ if (state_label && !strcmp(test_state_label, state_label))
+ break;
+ }
+ if (test_state == PM_SUSPEND_MAX) {
+ printk(warn_bad_state, test_state_label);
+ return 0;
+ }
+
+ /* RTCs have initialized by now too ... can we use one? */
+ dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm);
+ if (dev)
+ rtc = rtc_class_open(dev_name(dev));
+ if (!rtc) {
+ printk(warn_no_rtc);
+ return 0;
+ }
+
+ /* go for it */
+ test_wakealarm(rtc, test_state);
+ rtc_class_close(rtc);
+ return 0;
+}
+late_initcall(test_suspend);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
new file mode 100644
index 000000000..570aff817
--- /dev/null
+++ b/kernel/power/swap.c
@@ -0,0 +1,1512 @@
+/*
+ * linux/kernel/power/swap.c
+ *
+ * This file provides functions for reading the suspend image from
+ * and writing it to a swap partition.
+ *
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/file.h>
+#include <linux/delay.h>
+#include <linux/bitops.h>
+#include <linux/genhd.h>
+#include <linux/device.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/lzo.h>
+#include <linux/vmalloc.h>
+#include <linux/cpumask.h>
+#include <linux/atomic.h>
+#include <linux/kthread.h>
+#include <linux/crc32.h>
+#include <linux/ktime.h>
+
+#include "power.h"
+
+#define HIBERNATE_SIG "S1SUSPEND"
+
+/*
+ * The swap map is a data structure used for keeping track of each page
+ * written to a swap partition. It consists of many swap_map_page
+ * structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
+ * These structures are stored on the swap and linked together with the
+ * help of the .next_swap member.
+ *
+ * The swap map is created during suspend. The swap map pages are
+ * allocated and populated one at a time, so we only need one memory
+ * page to set up the entire structure.
+ *
+ * During resume we pick up all swap_map_page structures into a list.
+ */
+
+#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1)
+
+/*
+ * Number of free pages that are not high.
+ */
+static inline unsigned long low_free_pages(void)
+{
+ return nr_free_pages() - nr_free_highpages();
+}
+
+/*
+ * Number of pages required to be kept free while writing the image. Always
+ * half of all available low pages before the writing starts.
+ */
+static inline unsigned long reqd_free_pages(void)
+{
+ return low_free_pages() / 2;
+}
+
+struct swap_map_page {
+ sector_t entries[MAP_PAGE_ENTRIES];
+ sector_t next_swap;
+};
+
+struct swap_map_page_list {
+ struct swap_map_page *map;
+ struct swap_map_page_list *next;
+};
+
+/**
+ * The swap_map_handle structure is used for handling swap in
+ * a file-alike way
+ */
+
+struct swap_map_handle {
+ struct swap_map_page *cur;
+ struct swap_map_page_list *maps;
+ sector_t cur_swap;
+ sector_t first_sector;
+ unsigned int k;
+ unsigned long reqd_free_pages;
+ u32 crc32;
+};
+
+struct swsusp_header {
+ char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int) -
+ sizeof(u32)];
+ u32 crc32;
+ sector_t image;
+ unsigned int flags; /* Flags to pass to the "boot" kernel */
+ char orig_sig[10];
+ char sig[10];
+} __packed;
+
+static struct swsusp_header *swsusp_header;
+
+/**
+ * The following functions are used for tracing the allocated
+ * swap pages, so that they can be freed in case of an error.
+ */
+
+struct swsusp_extent {
+ struct rb_node node;
+ unsigned long start;
+ unsigned long end;
+};
+
+static struct rb_root swsusp_extents = RB_ROOT;
+
+static int swsusp_extents_insert(unsigned long swap_offset)
+{
+ struct rb_node **new = &(swsusp_extents.rb_node);
+ struct rb_node *parent = NULL;
+ struct swsusp_extent *ext;
+
+ /* Figure out where to put the new node */
+ while (*new) {
+ ext = rb_entry(*new, struct swsusp_extent, node);
+ parent = *new;
+ if (swap_offset < ext->start) {
+ /* Try to merge */
+ if (swap_offset == ext->start - 1) {
+ ext->start--;
+ return 0;
+ }
+ new = &((*new)->rb_left);
+ } else if (swap_offset > ext->end) {
+ /* Try to merge */
+ if (swap_offset == ext->end + 1) {
+ ext->end++;
+ return 0;
+ }
+ new = &((*new)->rb_right);
+ } else {
+ /* It already is in the tree */
+ return -EINVAL;
+ }
+ }
+ /* Add the new node and rebalance the tree. */
+ ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
+ if (!ext)
+ return -ENOMEM;
+
+ ext->start = swap_offset;
+ ext->end = swap_offset;
+ rb_link_node(&ext->node, parent, new);
+ rb_insert_color(&ext->node, &swsusp_extents);
+ return 0;
+}
+
+/**
+ * alloc_swapdev_block - allocate a swap page and register that it has
+ * been allocated, so that it can be freed in case of an error.
+ */
+
+sector_t alloc_swapdev_block(int swap)
+{
+ unsigned long offset;
+
+ offset = swp_offset(get_swap_page_of_type(swap));
+ if (offset) {
+ if (swsusp_extents_insert(offset))
+ swap_free(swp_entry(swap, offset));
+ else
+ return swapdev_block(swap, offset);
+ }
+ return 0;
+}
+
+/**
+ * free_all_swap_pages - free swap pages allocated for saving image data.
+ * It also frees the extents used to register which swap entries had been
+ * allocated.
+ */
+
+void free_all_swap_pages(int swap)
+{
+ struct rb_node *node;
+
+ while ((node = swsusp_extents.rb_node)) {
+ struct swsusp_extent *ext;
+ unsigned long offset;
+
+ ext = container_of(node, struct swsusp_extent, node);
+ rb_erase(node, &swsusp_extents);
+ for (offset = ext->start; offset <= ext->end; offset++)
+ swap_free(swp_entry(swap, offset));
+
+ kfree(ext);
+ }
+}
+
+int swsusp_swap_in_use(void)
+{
+ return (swsusp_extents.rb_node != NULL);
+}
+
+/*
+ * General things
+ */
+
+static unsigned short root_swap = 0xffff;
+struct block_device *hib_resume_bdev;
+
+/*
+ * Saving part
+ */
+
+static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
+{
+ int error;
+
+ hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
+ !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
+ memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
+ memcpy(swsusp_header->sig, HIBERNATE_SIG, 10);
+ swsusp_header->image = handle->first_sector;
+ swsusp_header->flags = flags;
+ if (flags & SF_CRC32_MODE)
+ swsusp_header->crc32 = handle->crc32;
+ error = hib_bio_write_page(swsusp_resume_block,
+ swsusp_header, NULL);
+ } else {
+ printk(KERN_ERR "PM: Swap header not found!\n");
+ error = -ENODEV;
+ }
+ return error;
+}
+
+/**
+ * swsusp_swap_check - check if the resume device is a swap device
+ * and get its index (if so)
+ *
+ * This is called before saving image
+ */
+static int swsusp_swap_check(void)
+{
+ int res;
+
+ res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
+ &hib_resume_bdev);
+ if (res < 0)
+ return res;
+
+ root_swap = res;
+ res = blkdev_get(hib_resume_bdev, FMODE_WRITE, NULL);
+ if (res)
+ return res;
+
+ res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
+ if (res < 0)
+ blkdev_put(hib_resume_bdev, FMODE_WRITE);
+
+ return res;
+}
+
+/**
+ * write_page - Write one page to given swap location.
+ * @buf: Address we're writing.
+ * @offset: Offset of the swap page we're writing to.
+ * @bio_chain: Link the next write BIO here
+ */
+
+static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
+{
+ void *src;
+ int ret;
+
+ if (!offset)
+ return -ENOSPC;
+
+ if (bio_chain) {
+ src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
+ __GFP_NORETRY);
+ if (src) {
+ copy_page(src, buf);
+ } else {
+ ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ if (ret)
+ return ret;
+ src = (void *)__get_free_page(__GFP_WAIT |
+ __GFP_NOWARN |
+ __GFP_NORETRY);
+ if (src) {
+ copy_page(src, buf);
+ } else {
+ WARN_ON_ONCE(1);
+ bio_chain = NULL; /* Go synchronous */
+ src = buf;
+ }
+ }
+ } else {
+ src = buf;
+ }
+ return hib_bio_write_page(offset, src, bio_chain);
+}
+
+static void release_swap_writer(struct swap_map_handle *handle)
+{
+ if (handle->cur)
+ free_page((unsigned long)handle->cur);
+ handle->cur = NULL;
+}
+
+static int get_swap_writer(struct swap_map_handle *handle)
+{
+ int ret;
+
+ ret = swsusp_swap_check();
+ if (ret) {
+ if (ret != -ENOSPC)
+ printk(KERN_ERR "PM: Cannot find swap device, try "
+ "swapon -a.\n");
+ return ret;
+ }
+ handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
+ if (!handle->cur) {
+ ret = -ENOMEM;
+ goto err_close;
+ }
+ handle->cur_swap = alloc_swapdev_block(root_swap);
+ if (!handle->cur_swap) {
+ ret = -ENOSPC;
+ goto err_rel;
+ }
+ handle->k = 0;
+ handle->reqd_free_pages = reqd_free_pages();
+ handle->first_sector = handle->cur_swap;
+ return 0;
+err_rel:
+ release_swap_writer(handle);
+err_close:
+ swsusp_close(FMODE_WRITE);
+ return ret;
+}
+
+static int swap_write_page(struct swap_map_handle *handle, void *buf,
+ struct bio **bio_chain)
+{
+ int error = 0;
+ sector_t offset;
+
+ if (!handle->cur)
+ return -EINVAL;
+ offset = alloc_swapdev_block(root_swap);
+ error = write_page(buf, offset, bio_chain);
+ if (error)
+ return error;
+ handle->cur->entries[handle->k++] = offset;
+ if (handle->k >= MAP_PAGE_ENTRIES) {
+ offset = alloc_swapdev_block(root_swap);
+ if (!offset)
+ return -ENOSPC;
+ handle->cur->next_swap = offset;
+ error = write_page(handle->cur, handle->cur_swap, bio_chain);
+ if (error)
+ goto out;
+ clear_page(handle->cur);
+ handle->cur_swap = offset;
+ handle->k = 0;
+
+ if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
+ error = hib_wait_on_bio_chain(bio_chain);
+ if (error)
+ goto out;
+ /*
+ * Recalculate the number of required free pages, to
+ * make sure we never take more than half.
+ */
+ handle->reqd_free_pages = reqd_free_pages();
+ }
+ }
+ out:
+ return error;
+}
+
+static int flush_swap_writer(struct swap_map_handle *handle)
+{
+ if (handle->cur && handle->cur_swap)
+ return write_page(handle->cur, handle->cur_swap, NULL);
+ else
+ return -EINVAL;
+}
+
+static int swap_writer_finish(struct swap_map_handle *handle,
+ unsigned int flags, int error)
+{
+ if (!error) {
+ flush_swap_writer(handle);
+ printk(KERN_INFO "PM: S");
+ error = mark_swapfiles(handle, flags);
+ printk("|\n");
+ }
+
+ if (error)
+ free_all_swap_pages(root_swap);
+ release_swap_writer(handle);
+ swsusp_close(FMODE_WRITE);
+
+ return error;
+}
+
+/* We need to remember how much compressed data we need to read. */
+#define LZO_HEADER sizeof(size_t)
+
+/* Number of pages/bytes we'll compress at one time. */
+#define LZO_UNC_PAGES 32
+#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE)
+
+/* Number of pages/bytes we need for compressed data (worst case). */
+#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
+ LZO_HEADER, PAGE_SIZE)
+#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+
+/* Maximum number of threads for compression/decompression. */
+#define LZO_THREADS 3
+
+/* Minimum/maximum number of pages for read buffering. */
+#define LZO_MIN_RD_PAGES 1024
+#define LZO_MAX_RD_PAGES 8192
+
+
+/**
+ * save_image - save the suspend image data
+ */
+
+static int save_image(struct swap_map_handle *handle,
+ struct snapshot_handle *snapshot,
+ unsigned int nr_to_write)
+{
+ unsigned int m;
+ int ret;
+ int nr_pages;
+ int err2;
+ struct bio *bio;
+ ktime_t start;
+ ktime_t stop;
+
+ printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
+ nr_to_write);
+ m = nr_to_write / 10;
+ if (!m)
+ m = 1;
+ nr_pages = 0;
+ bio = NULL;
+ start = ktime_get();
+ while (1) {
+ ret = snapshot_read_next(snapshot);
+ if (ret <= 0)
+ break;
+ ret = swap_write_page(handle, data_of(*snapshot), &bio);
+ if (ret)
+ break;
+ if (!(nr_pages % m))
+ printk(KERN_INFO "PM: Image saving progress: %3d%%\n",
+ nr_pages / m * 10);
+ nr_pages++;
+ }
+ err2 = hib_wait_on_bio_chain(&bio);
+ stop = ktime_get();
+ if (!ret)
+ ret = err2;
+ if (!ret)
+ printk(KERN_INFO "PM: Image saving done.\n");
+ swsusp_show_speed(start, stop, nr_to_write, "Wrote");
+ return ret;
+}
+
+/**
+ * Structure used for CRC32.
+ */
+struct crc_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ unsigned run_threads; /* nr current threads */
+ wait_queue_head_t go; /* start crc update */
+ wait_queue_head_t done; /* crc update done */
+ u32 *crc32; /* points to handle's crc32 */
+ size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */
+ unsigned char *unc[LZO_THREADS]; /* uncompressed data */
+};
+
+/**
+ * CRC32 update function that runs in its own thread.
+ */
+static int crc32_threadfn(void *data)
+{
+ struct crc_data *d = data;
+ unsigned i;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ for (i = 0; i < d->run_threads; i++)
+ *d->crc32 = crc32_le(*d->crc32,
+ d->unc[i], *d->unc_len[i]);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
+/**
+ * Structure used for LZO data compression.
+ */
+struct cmp_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start compression */
+ wait_queue_head_t done; /* compression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+};
+
+/**
+ * Compression function that runs in its own thread.
+ */
+static int lzo_compress_threadfn(void *data)
+{
+ struct cmp_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->ret = lzo1x_1_compress(d->unc, d->unc_len,
+ d->cmp + LZO_HEADER, &d->cmp_len,
+ d->wrk);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
+
+/**
+ * save_image_lzo - Save the suspend image data compressed with LZO.
+ * @handle: Swap map handle to use for saving the image.
+ * @snapshot: Image to read data from.
+ * @nr_to_write: Number of pages to save.
+ */
+static int save_image_lzo(struct swap_map_handle *handle,
+ struct snapshot_handle *snapshot,
+ unsigned int nr_to_write)
+{
+ unsigned int m;
+ int ret = 0;
+ int nr_pages;
+ int err2;
+ struct bio *bio;
+ ktime_t start;
+ ktime_t stop;
+ size_t off;
+ unsigned thr, run_threads, nr_threads;
+ unsigned char *page = NULL;
+ struct cmp_data *data = NULL;
+ struct crc_data *crc = NULL;
+
+ /*
+ * We'll limit the number of threads for compression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+
+ page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct cmp_data, go));
+
+ crc = kmalloc(sizeof(*crc), GFP_KERNEL);
+ if (!crc) {
+ printk(KERN_ERR "PM: Failed to allocate crc\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ memset(crc, 0, offsetof(struct crc_data, go));
+
+ /*
+ * Start the compression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_compress_threadfn,
+ &data[thr],
+ "image_compress/%u", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start compression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ }
+
+ /*
+ * Start the CRC32 thread.
+ */
+ init_waitqueue_head(&crc->go);
+ init_waitqueue_head(&crc->done);
+
+ handle->crc32 = 0;
+ crc->crc32 = &handle->crc32;
+ for (thr = 0; thr < nr_threads; thr++) {
+ crc->unc[thr] = data[thr].unc;
+ crc->unc_len[thr] = &data[thr].unc_len;
+ }
+
+ crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
+ if (IS_ERR(crc->thr)) {
+ crc->thr = NULL;
+ printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+
+ /*
+ * Adjust the number of required free pages after all allocations have
+ * been done. We don't want to run out of pages when writing.
+ */
+ handle->reqd_free_pages = reqd_free_pages();
+
+ printk(KERN_INFO
+ "PM: Using %u thread(s) for compression.\n"
+ "PM: Compressing and saving image data (%u pages)...\n",
+ nr_threads, nr_to_write);
+ m = nr_to_write / 10;
+ if (!m)
+ m = 1;
+ nr_pages = 0;
+ bio = NULL;
+ start = ktime_get();
+ for (;;) {
+ for (thr = 0; thr < nr_threads; thr++) {
+ for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ ret = snapshot_read_next(snapshot);
+ if (ret < 0)
+ goto out_finish;
+
+ if (!ret)
+ break;
+
+ memcpy(data[thr].unc + off,
+ data_of(*snapshot), PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_INFO
+ "PM: Image saving progress: "
+ "%3d%%\n",
+ nr_pages / m * 10);
+ nr_pages++;
+ }
+ if (!off)
+ break;
+
+ data[thr].unc_len = off;
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
+ }
+
+ if (!thr)
+ break;
+
+ crc->run_threads = thr;
+ atomic_set(&crc->ready, 1);
+ wake_up(&crc->go);
+
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
+
+ ret = data[thr].ret;
+
+ if (ret < 0) {
+ printk(KERN_ERR "PM: LZO compression failed\n");
+ goto out_finish;
+ }
+
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(data[thr].unc_len))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
+ goto out_finish;
+ }
+
+ *(size_t *)data[thr].cmp = data[thr].cmp_len;
+
+ /*
+ * Given we are writing one page at a time to disk, we
+ * copy that much from the buffer, although the last
+ * bit will likely be smaller than full page. This is
+ * OK - we saved the length of the compressed data, so
+ * any garbage at the end will be discarded when we
+ * read it.
+ */
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(page, data[thr].cmp + off, PAGE_SIZE);
+
+ ret = swap_write_page(handle, page, &bio);
+ if (ret)
+ goto out_finish;
+ }
+ }
+
+ wait_event(crc->done, atomic_read(&crc->stop));
+ atomic_set(&crc->stop, 0);
+ }
+
+out_finish:
+ err2 = hib_wait_on_bio_chain(&bio);
+ stop = ktime_get();
+ if (!ret)
+ ret = err2;
+ if (!ret)
+ printk(KERN_INFO "PM: Image saving done.\n");
+ swsusp_show_speed(start, stop, nr_to_write, "Wrote");
+out_clean:
+ if (crc) {
+ if (crc->thr)
+ kthread_stop(crc->thr);
+ kfree(crc);
+ }
+ if (data) {
+ for (thr = 0; thr < nr_threads; thr++)
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ vfree(data);
+ }
+ if (page) free_page((unsigned long)page);
+
+ return ret;
+}
+
+/**
+ * enough_swap - Make sure we have enough swap to save the image.
+ *
+ * Returns TRUE or FALSE after checking the total amount of swap
+ * space avaiable from the resume partition.
+ */
+
+static int enough_swap(unsigned int nr_pages, unsigned int flags)
+{
+ unsigned int free_swap = count_swap_pages(root_swap, 1);
+ unsigned int required;
+
+ pr_debug("PM: Free swap pages: %u\n", free_swap);
+
+ required = PAGES_FOR_IO + nr_pages;
+ return free_swap > required;
+}
+
+/**
+ * swsusp_write - Write entire image and metadata.
+ * @flags: flags to pass to the "boot" kernel in the image header
+ *
+ * It is important _NOT_ to umount filesystems at this point. We want
+ * them synced (in case something goes wrong) but we DO not want to mark
+ * filesystem clean: it is not. (And it does not matter, if we resume
+ * correctly, we'll mark system clean, anyway.)
+ */
+
+int swsusp_write(unsigned int flags)
+{
+ struct swap_map_handle handle;
+ struct snapshot_handle snapshot;
+ struct swsusp_info *header;
+ unsigned long pages;
+ int error;
+
+ pages = snapshot_get_image_size();
+ error = get_swap_writer(&handle);
+ if (error) {
+ printk(KERN_ERR "PM: Cannot get swap writer\n");
+ return error;
+ }
+ if (flags & SF_NOCOMPRESS_MODE) {
+ if (!enough_swap(pages, flags)) {
+ printk(KERN_ERR "PM: Not enough free swap\n");
+ error = -ENOSPC;
+ goto out_finish;
+ }
+ }
+ memset(&snapshot, 0, sizeof(struct snapshot_handle));
+ error = snapshot_read_next(&snapshot);
+ if (error < PAGE_SIZE) {
+ if (error >= 0)
+ error = -EFAULT;
+
+ goto out_finish;
+ }
+ header = (struct swsusp_info *)data_of(snapshot);
+ error = swap_write_page(&handle, header, NULL);
+ if (!error) {
+ error = (flags & SF_NOCOMPRESS_MODE) ?
+ save_image(&handle, &snapshot, pages - 1) :
+ save_image_lzo(&handle, &snapshot, pages - 1);
+ }
+out_finish:
+ error = swap_writer_finish(&handle, flags, error);
+ return error;
+}
+
+/**
+ * The following functions allow us to read data using a swap map
+ * in a file-alike way
+ */
+
+static void release_swap_reader(struct swap_map_handle *handle)
+{
+ struct swap_map_page_list *tmp;
+
+ while (handle->maps) {
+ if (handle->maps->map)
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ kfree(tmp);
+ }
+ handle->cur = NULL;
+}
+
+static int get_swap_reader(struct swap_map_handle *handle,
+ unsigned int *flags_p)
+{
+ int error;
+ struct swap_map_page_list *tmp, *last;
+ sector_t offset;
+
+ *flags_p = swsusp_header->flags;
+
+ if (!swsusp_header->image) /* how can this happen? */
+ return -EINVAL;
+
+ handle->cur = NULL;
+ last = handle->maps = NULL;
+ offset = swsusp_header->image;
+ while (offset) {
+ tmp = kmalloc(sizeof(*handle->maps), GFP_KERNEL);
+ if (!tmp) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
+ memset(tmp, 0, sizeof(*tmp));
+ if (!handle->maps)
+ handle->maps = tmp;
+ if (last)
+ last->next = tmp;
+ last = tmp;
+
+ tmp->map = (struct swap_map_page *)
+ __get_free_page(__GFP_WAIT | __GFP_HIGH);
+ if (!tmp->map) {
+ release_swap_reader(handle);
+ return -ENOMEM;
+ }
+
+ error = hib_bio_read_page(offset, tmp->map, NULL);
+ if (error) {
+ release_swap_reader(handle);
+ return error;
+ }
+ offset = tmp->map->next_swap;
+ }
+ handle->k = 0;
+ handle->cur = handle->maps->map;
+ return 0;
+}
+
+static int swap_read_page(struct swap_map_handle *handle, void *buf,
+ struct bio **bio_chain)
+{
+ sector_t offset;
+ int error;
+ struct swap_map_page_list *tmp;
+
+ if (!handle->cur)
+ return -EINVAL;
+ offset = handle->cur->entries[handle->k];
+ if (!offset)
+ return -EFAULT;
+ error = hib_bio_read_page(offset, buf, bio_chain);
+ if (error)
+ return error;
+ if (++handle->k >= MAP_PAGE_ENTRIES) {
+ handle->k = 0;
+ free_page((unsigned long)handle->maps->map);
+ tmp = handle->maps;
+ handle->maps = handle->maps->next;
+ kfree(tmp);
+ if (!handle->maps)
+ release_swap_reader(handle);
+ else
+ handle->cur = handle->maps->map;
+ }
+ return error;
+}
+
+static int swap_reader_finish(struct swap_map_handle *handle)
+{
+ release_swap_reader(handle);
+
+ return 0;
+}
+
+/**
+ * load_image - load the image using the swap map handle
+ * @handle and the snapshot handle @snapshot
+ * (assume there are @nr_pages pages to load)
+ */
+
+static int load_image(struct swap_map_handle *handle,
+ struct snapshot_handle *snapshot,
+ unsigned int nr_to_read)
+{
+ unsigned int m;
+ int ret = 0;
+ ktime_t start;
+ ktime_t stop;
+ struct bio *bio;
+ int err2;
+ unsigned nr_pages;
+
+ printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
+ nr_to_read);
+ m = nr_to_read / 10;
+ if (!m)
+ m = 1;
+ nr_pages = 0;
+ bio = NULL;
+ start = ktime_get();
+ for ( ; ; ) {
+ ret = snapshot_write_next(snapshot);
+ if (ret <= 0)
+ break;
+ ret = swap_read_page(handle, data_of(*snapshot), &bio);
+ if (ret)
+ break;
+ if (snapshot->sync_read)
+ ret = hib_wait_on_bio_chain(&bio);
+ if (ret)
+ break;
+ if (!(nr_pages % m))
+ printk(KERN_INFO "PM: Image loading progress: %3d%%\n",
+ nr_pages / m * 10);
+ nr_pages++;
+ }
+ err2 = hib_wait_on_bio_chain(&bio);
+ stop = ktime_get();
+ if (!ret)
+ ret = err2;
+ if (!ret) {
+ printk(KERN_INFO "PM: Image loading done.\n");
+ snapshot_write_finalize(snapshot);
+ if (!snapshot_image_loaded(snapshot))
+ ret = -ENODATA;
+ }
+ swsusp_show_speed(start, stop, nr_to_read, "Read");
+ return ret;
+}
+
+/**
+ * Structure used for LZO data decompression.
+ */
+struct dec_data {
+ struct task_struct *thr; /* thread */
+ atomic_t ready; /* ready to start flag */
+ atomic_t stop; /* ready to stop flag */
+ int ret; /* return code */
+ wait_queue_head_t go; /* start decompression */
+ wait_queue_head_t done; /* decompression done */
+ size_t unc_len; /* uncompressed length */
+ size_t cmp_len; /* compressed length */
+ unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+};
+
+/**
+ * Deompression function that runs in its own thread.
+ */
+static int lzo_decompress_threadfn(void *data)
+{
+ struct dec_data *d = data;
+
+ while (1) {
+ wait_event(d->go, atomic_read(&d->ready) ||
+ kthread_should_stop());
+ if (kthread_should_stop()) {
+ d->thr = NULL;
+ d->ret = -1;
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ break;
+ }
+ atomic_set(&d->ready, 0);
+
+ d->unc_len = LZO_UNC_SIZE;
+ d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
+ d->unc, &d->unc_len);
+ atomic_set(&d->stop, 1);
+ wake_up(&d->done);
+ }
+ return 0;
+}
+
+/**
+ * load_image_lzo - Load compressed image data and decompress them with LZO.
+ * @handle: Swap map handle to use for loading data.
+ * @snapshot: Image to copy uncompressed data into.
+ * @nr_to_read: Number of pages to load.
+ */
+static int load_image_lzo(struct swap_map_handle *handle,
+ struct snapshot_handle *snapshot,
+ unsigned int nr_to_read)
+{
+ unsigned int m;
+ int ret = 0;
+ int eof = 0;
+ struct bio *bio;
+ ktime_t start;
+ ktime_t stop;
+ unsigned nr_pages;
+ size_t off;
+ unsigned i, thr, run_threads, nr_threads;
+ unsigned ring = 0, pg = 0, ring_size = 0,
+ have = 0, want, need, asked = 0;
+ unsigned long read_pages = 0;
+ unsigned char **page = NULL;
+ struct dec_data *data = NULL;
+ struct crc_data *crc = NULL;
+
+ /*
+ * We'll limit the number of threads for decompression to limit memory
+ * footprint.
+ */
+ nr_threads = num_online_cpus() - 1;
+ nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+
+ page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES);
+ if (!page) {
+ printk(KERN_ERR "PM: Failed to allocate LZO page\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+
+ data = vmalloc(sizeof(*data) * nr_threads);
+ if (!data) {
+ printk(KERN_ERR "PM: Failed to allocate LZO data\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ for (thr = 0; thr < nr_threads; thr++)
+ memset(&data[thr], 0, offsetof(struct dec_data, go));
+
+ crc = kmalloc(sizeof(*crc), GFP_KERNEL);
+ if (!crc) {
+ printk(KERN_ERR "PM: Failed to allocate crc\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ memset(crc, 0, offsetof(struct crc_data, go));
+
+ /*
+ * Start the decompression threads.
+ */
+ for (thr = 0; thr < nr_threads; thr++) {
+ init_waitqueue_head(&data[thr].go);
+ init_waitqueue_head(&data[thr].done);
+
+ data[thr].thr = kthread_run(lzo_decompress_threadfn,
+ &data[thr],
+ "image_decompress/%u", thr);
+ if (IS_ERR(data[thr].thr)) {
+ data[thr].thr = NULL;
+ printk(KERN_ERR
+ "PM: Cannot start decompression threads\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+ }
+
+ /*
+ * Start the CRC32 thread.
+ */
+ init_waitqueue_head(&crc->go);
+ init_waitqueue_head(&crc->done);
+
+ handle->crc32 = 0;
+ crc->crc32 = &handle->crc32;
+ for (thr = 0; thr < nr_threads; thr++) {
+ crc->unc[thr] = data[thr].unc;
+ crc->unc_len[thr] = &data[thr].unc_len;
+ }
+
+ crc->thr = kthread_run(crc32_threadfn, crc, "image_crc32");
+ if (IS_ERR(crc->thr)) {
+ crc->thr = NULL;
+ printk(KERN_ERR "PM: Cannot start CRC32 thread\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ }
+
+ /*
+ * Set the number of pages for read buffering.
+ * This is complete guesswork, because we'll only know the real
+ * picture once prepare_image() is called, which is much later on
+ * during the image load phase. We'll assume the worst case and
+ * say that none of the image pages are from high memory.
+ */
+ if (low_free_pages() > snapshot_get_image_size())
+ read_pages = (low_free_pages() - snapshot_get_image_size()) / 2;
+ read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES);
+
+ for (i = 0; i < read_pages; i++) {
+ page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
+ __GFP_WAIT | __GFP_HIGH :
+ __GFP_WAIT | __GFP_NOWARN |
+ __GFP_NORETRY);
+
+ if (!page[i]) {
+ if (i < LZO_CMP_PAGES) {
+ ring_size = i;
+ printk(KERN_ERR
+ "PM: Failed to allocate LZO pages\n");
+ ret = -ENOMEM;
+ goto out_clean;
+ } else {
+ break;
+ }
+ }
+ }
+ want = ring_size = i;
+
+ printk(KERN_INFO
+ "PM: Using %u thread(s) for decompression.\n"
+ "PM: Loading and decompressing image data (%u pages)...\n",
+ nr_threads, nr_to_read);
+ m = nr_to_read / 10;
+ if (!m)
+ m = 1;
+ nr_pages = 0;
+ bio = NULL;
+ start = ktime_get();
+
+ ret = snapshot_write_next(snapshot);
+ if (ret <= 0)
+ goto out_finish;
+
+ for(;;) {
+ for (i = 0; !eof && i < want; i++) {
+ ret = swap_read_page(handle, page[ring], &bio);
+ if (ret) {
+ /*
+ * On real read error, finish. On end of data,
+ * set EOF flag and just exit the read loop.
+ */
+ if (handle->cur &&
+ handle->cur->entries[handle->k]) {
+ goto out_finish;
+ } else {
+ eof = 1;
+ break;
+ }
+ }
+ if (++ring >= ring_size)
+ ring = 0;
+ }
+ asked += i;
+ want -= i;
+
+ /*
+ * We are out of data, wait for some more.
+ */
+ if (!have) {
+ if (!asked)
+ break;
+
+ ret = hib_wait_on_bio_chain(&bio);
+ if (ret)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
+ }
+
+ if (crc->run_threads) {
+ wait_event(crc->done, atomic_read(&crc->stop));
+ atomic_set(&crc->stop, 0);
+ crc->run_threads = 0;
+ }
+
+ for (thr = 0; have && thr < nr_threads; thr++) {
+ data[thr].cmp_len = *(size_t *)page[pg];
+ if (unlikely(!data[thr].cmp_len ||
+ data[thr].cmp_len >
+ lzo1x_worst_compress(LZO_UNC_SIZE))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO compressed length\n");
+ ret = -1;
+ goto out_finish;
+ }
+
+ need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ PAGE_SIZE);
+ if (need > have) {
+ if (eof > 1) {
+ ret = -1;
+ goto out_finish;
+ }
+ break;
+ }
+
+ for (off = 0;
+ off < LZO_HEADER + data[thr].cmp_len;
+ off += PAGE_SIZE) {
+ memcpy(data[thr].cmp + off,
+ page[pg], PAGE_SIZE);
+ have--;
+ want++;
+ if (++pg >= ring_size)
+ pg = 0;
+ }
+
+ atomic_set(&data[thr].ready, 1);
+ wake_up(&data[thr].go);
+ }
+
+ /*
+ * Wait for more data while we are decompressing.
+ */
+ if (have < LZO_CMP_PAGES && asked) {
+ ret = hib_wait_on_bio_chain(&bio);
+ if (ret)
+ goto out_finish;
+ have += asked;
+ asked = 0;
+ if (eof)
+ eof = 2;
+ }
+
+ for (run_threads = thr, thr = 0; thr < run_threads; thr++) {
+ wait_event(data[thr].done,
+ atomic_read(&data[thr].stop));
+ atomic_set(&data[thr].stop, 0);
+
+ ret = data[thr].ret;
+
+ if (ret < 0) {
+ printk(KERN_ERR
+ "PM: LZO decompression failed\n");
+ goto out_finish;
+ }
+
+ if (unlikely(!data[thr].unc_len ||
+ data[thr].unc_len > LZO_UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ printk(KERN_ERR
+ "PM: Invalid LZO uncompressed length\n");
+ ret = -1;
+ goto out_finish;
+ }
+
+ for (off = 0;
+ off < data[thr].unc_len; off += PAGE_SIZE) {
+ memcpy(data_of(*snapshot),
+ data[thr].unc + off, PAGE_SIZE);
+
+ if (!(nr_pages % m))
+ printk(KERN_INFO
+ "PM: Image loading progress: "
+ "%3d%%\n",
+ nr_pages / m * 10);
+ nr_pages++;
+
+ ret = snapshot_write_next(snapshot);
+ if (ret <= 0) {
+ crc->run_threads = thr + 1;
+ atomic_set(&crc->ready, 1);
+ wake_up(&crc->go);
+ goto out_finish;
+ }
+ }
+ }
+
+ crc->run_threads = thr;
+ atomic_set(&crc->ready, 1);
+ wake_up(&crc->go);
+ }
+
+out_finish:
+ if (crc->run_threads) {
+ wait_event(crc->done, atomic_read(&crc->stop));
+ atomic_set(&crc->stop, 0);
+ }
+ stop = ktime_get();
+ if (!ret) {
+ printk(KERN_INFO "PM: Image loading done.\n");
+ snapshot_write_finalize(snapshot);
+ if (!snapshot_image_loaded(snapshot))
+ ret = -ENODATA;
+ if (!ret) {
+ if (swsusp_header->flags & SF_CRC32_MODE) {
+ if(handle->crc32 != swsusp_header->crc32) {
+ printk(KERN_ERR
+ "PM: Invalid image CRC32!\n");
+ ret = -ENODATA;
+ }
+ }
+ }
+ }
+ swsusp_show_speed(start, stop, nr_to_read, "Read");
+out_clean:
+ for (i = 0; i < ring_size; i++)
+ free_page((unsigned long)page[i]);
+ if (crc) {
+ if (crc->thr)
+ kthread_stop(crc->thr);
+ kfree(crc);
+ }
+ if (data) {
+ for (thr = 0; thr < nr_threads; thr++)
+ if (data[thr].thr)
+ kthread_stop(data[thr].thr);
+ vfree(data);
+ }
+ vfree(page);
+
+ return ret;
+}
+
+/**
+ * swsusp_read - read the hibernation image.
+ * @flags_p: flags passed by the "frozen" kernel in the image header should
+ * be written into this memory location
+ */
+
+int swsusp_read(unsigned int *flags_p)
+{
+ int error;
+ struct swap_map_handle handle;
+ struct snapshot_handle snapshot;
+ struct swsusp_info *header;
+
+ memset(&snapshot, 0, sizeof(struct snapshot_handle));
+ error = snapshot_write_next(&snapshot);
+ if (error < PAGE_SIZE)
+ return error < 0 ? error : -EFAULT;
+ header = (struct swsusp_info *)data_of(snapshot);
+ error = get_swap_reader(&handle, flags_p);
+ if (error)
+ goto end;
+ if (!error)
+ error = swap_read_page(&handle, header, NULL);
+ if (!error) {
+ error = (*flags_p & SF_NOCOMPRESS_MODE) ?
+ load_image(&handle, &snapshot, header->pages - 1) :
+ load_image_lzo(&handle, &snapshot, header->pages - 1);
+ }
+ swap_reader_finish(&handle);
+end:
+ if (!error)
+ pr_debug("PM: Image successfully loaded\n");
+ else
+ pr_debug("PM: Error %d resuming\n", error);
+ return error;
+}
+
+/**
+ * swsusp_check - Check for swsusp signature in the resume device
+ */
+
+int swsusp_check(void)
+{
+ int error;
+
+ hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+ FMODE_READ, NULL);
+ if (!IS_ERR(hib_resume_bdev)) {
+ set_blocksize(hib_resume_bdev, PAGE_SIZE);
+ clear_page(swsusp_header);
+ error = hib_bio_read_page(swsusp_resume_block,
+ swsusp_header, NULL);
+ if (error)
+ goto put;
+
+ if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
+ memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
+ /* Reset swap signature now */
+ error = hib_bio_write_page(swsusp_resume_block,
+ swsusp_header, NULL);
+ } else {
+ error = -EINVAL;
+ }
+
+put:
+ if (error)
+ blkdev_put(hib_resume_bdev, FMODE_READ);
+ else
+ pr_debug("PM: Image signature found, resuming\n");
+ } else {
+ error = PTR_ERR(hib_resume_bdev);
+ }
+
+ if (error)
+ pr_debug("PM: Image not found (code %d)\n", error);
+
+ return error;
+}
+
+/**
+ * swsusp_close - close swap device.
+ */
+
+void swsusp_close(fmode_t mode)
+{
+ if (IS_ERR(hib_resume_bdev)) {
+ pr_debug("PM: Image device not initialised\n");
+ return;
+ }
+
+ blkdev_put(hib_resume_bdev, mode);
+}
+
+/**
+ * swsusp_unmark - Unmark swsusp signature in the resume device
+ */
+
+#ifdef CONFIG_SUSPEND
+int swsusp_unmark(void)
+{
+ int error;
+
+ hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
+ memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
+ error = hib_bio_write_page(swsusp_resume_block,
+ swsusp_header, NULL);
+ } else {
+ printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
+ error = -ENODEV;
+ }
+
+ /*
+ * We just returned from suspend, we don't need the image any more.
+ */
+ free_all_swap_pages(root_swap);
+
+ return error;
+}
+#endif
+
+static int swsusp_header_init(void)
+{
+ swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL);
+ if (!swsusp_header)
+ panic("Could not allocate memory for swsusp_header\n");
+ return 0;
+}
+
+core_initcall(swsusp_header_init);
diff --git a/kernel/power/tuxonice.h b/kernel/power/tuxonice.h
new file mode 100644
index 000000000..1aff98026
--- /dev/null
+++ b/kernel/power/tuxonice.h
@@ -0,0 +1,260 @@
+/*
+ * kernel/power/tuxonice.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations used throughout swsusp.
+ *
+ */
+
+#ifndef KERNEL_POWER_TOI_H
+#define KERNEL_POWER_TOI_H
+
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/suspend.h>
+#include <linux/fs.h>
+#include <asm/setup.h>
+#include "tuxonice_pageflags.h"
+#include "power.h"
+
+#define TOI_CORE_VERSION "3.3"
+#define TOI_HEADER_VERSION 3
+#define MY_BOOT_KERNEL_DATA_VERSION 4
+
+struct toi_boot_kernel_data {
+ int version;
+ int size;
+ unsigned long toi_action;
+ unsigned long toi_debug_state;
+ u32 toi_default_console_level;
+ int toi_io_time[2][2];
+ char toi_nosave_commandline[COMMAND_LINE_SIZE];
+ unsigned long pages_used[33];
+ unsigned long incremental_bytes_in;
+ unsigned long incremental_bytes_out;
+ unsigned long compress_bytes_in;
+ unsigned long compress_bytes_out;
+ unsigned long pruned_pages;
+};
+
+extern struct toi_boot_kernel_data toi_bkd;
+
+/* Location of book kernel data struct in kernel being resumed */
+extern unsigned long boot_kernel_data_buffer;
+
+/* == Action states == */
+
+enum {
+ TOI_REBOOT,
+ TOI_PAUSE,
+ TOI_LOGALL,
+ TOI_CAN_CANCEL,
+ TOI_KEEP_IMAGE,
+ TOI_FREEZER_TEST,
+ TOI_SINGLESTEP,
+ TOI_PAUSE_NEAR_PAGESET_END,
+ TOI_TEST_FILTER_SPEED,
+ TOI_TEST_BIO,
+ TOI_NO_PAGESET2,
+ TOI_IGNORE_ROOTFS,
+ TOI_REPLACE_SWSUSP,
+ TOI_PAGESET2_FULL,
+ TOI_ABORT_ON_RESAVE_NEEDED,
+ TOI_NO_MULTITHREADED_IO,
+ TOI_NO_DIRECT_LOAD, /* Obsolete */
+ TOI_LATE_CPU_HOTPLUG, /* Obsolete */
+ TOI_GET_MAX_MEM_ALLOCD,
+ TOI_NO_FLUSHER_THREAD,
+ TOI_NO_PS2_IF_UNNEEDED,
+ TOI_POST_RESUME_BREAKPOINT,
+ TOI_NO_READAHEAD,
+ TOI_TRACE_DEBUG_ON,
+ TOI_INCREMENTAL_IMAGE,
+};
+
+extern unsigned long toi_bootflags_mask;
+
+#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
+
+/* == Result states == */
+
+enum {
+ TOI_ABORTED,
+ TOI_ABORT_REQUESTED,
+ TOI_NOSTORAGE_AVAILABLE,
+ TOI_INSUFFICIENT_STORAGE,
+ TOI_FREEZING_FAILED,
+ TOI_KEPT_IMAGE,
+ TOI_WOULD_EAT_MEMORY,
+ TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
+ TOI_PM_SEM,
+ TOI_DEVICE_REFUSED,
+ TOI_SYSDEV_REFUSED,
+ TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
+ TOI_UNABLE_TO_PREPARE_IMAGE,
+ TOI_FAILED_MODULE_INIT,
+ TOI_FAILED_MODULE_CLEANUP,
+ TOI_FAILED_IO,
+ TOI_OUT_OF_MEMORY,
+ TOI_IMAGE_ERROR,
+ TOI_PLATFORM_PREP_FAILED,
+ TOI_CPU_HOTPLUG_FAILED,
+ TOI_ARCH_PREPARE_FAILED, /* Removed Linux-3.0 */
+ TOI_RESAVE_NEEDED,
+ TOI_CANT_SUSPEND,
+ TOI_NOTIFIERS_PREPARE_FAILED,
+ TOI_PRE_SNAPSHOT_FAILED,
+ TOI_PRE_RESTORE_FAILED,
+ TOI_USERMODE_HELPERS_ERR,
+ TOI_CANT_USE_ALT_RESUME,
+ TOI_HEADER_TOO_BIG,
+ TOI_WAKEUP_EVENT,
+ TOI_SYSCORE_REFUSED,
+ TOI_DPM_PREPARE_FAILED,
+ TOI_DPM_SUSPEND_FAILED,
+ TOI_NUM_RESULT_STATES /* Used in printing debug info only */
+};
+
+extern unsigned long toi_result;
+
+#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
+#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
+ test_and_set_bit(bit, &toi_result))
+#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
+#define test_result_state(bit) (test_bit(bit, &toi_result))
+
+/* == Debug sections and levels == */
+
+/* debugging levels. */
+enum {
+ TOI_STATUS = 0,
+ TOI_ERROR = 2,
+ TOI_LOW,
+ TOI_MEDIUM,
+ TOI_HIGH,
+ TOI_VERBOSE,
+};
+
+enum {
+ TOI_ANY_SECTION,
+ TOI_EAT_MEMORY,
+ TOI_IO,
+ TOI_HEADER,
+ TOI_WRITER,
+ TOI_MEMORY,
+ TOI_PAGEDIR,
+ TOI_COMPRESS,
+ TOI_BIO,
+};
+
+#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
+#define clear_debug_state(bit) \
+ (test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
+#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
+
+/* == Steps in hibernating == */
+
+enum {
+ STEP_HIBERNATE_PREPARE_IMAGE,
+ STEP_HIBERNATE_SAVE_IMAGE,
+ STEP_HIBERNATE_POWERDOWN,
+ STEP_RESUME_CAN_RESUME,
+ STEP_RESUME_LOAD_PS1,
+ STEP_RESUME_DO_RESTORE,
+ STEP_RESUME_READ_PS2,
+ STEP_RESUME_GO,
+ STEP_RESUME_ALT_IMAGE,
+ STEP_CLEANUP,
+ STEP_QUIET_CLEANUP
+};
+
+/* == TuxOnIce states ==
+ (see also include/linux/suspend.h) */
+
+#define get_toi_state() (toi_state)
+#define restore_toi_state(saved_state) \
+ do { toi_state = saved_state; } while (0)
+
+/* == Module support == */
+
+struct toi_core_fns {
+ int (*post_context_save)(void);
+ unsigned long (*get_nonconflicting_page)(void);
+ int (*try_hibernate)(void);
+ void (*try_resume)(void);
+};
+
+extern struct toi_core_fns *toi_core_fns;
+
+/* == All else == */
+#define KB(x) ((x) << (PAGE_SHIFT - 10))
+#define MB(x) ((x) >> (20 - PAGE_SHIFT))
+
+extern int toi_start_anything(int toi_or_resume);
+extern void toi_finish_anything(int toi_or_resume);
+
+extern int save_image_part1(void);
+extern int toi_atomic_restore(void);
+
+extern int toi_try_hibernate(void);
+extern void toi_try_resume(void);
+
+extern int __toi_post_context_save(void);
+
+extern unsigned int nr_hibernates;
+extern char alt_resume_param[256];
+
+extern void copyback_post(void);
+extern int toi_hibernate(void);
+extern unsigned long extra_pd1_pages_used;
+
+#define SECTOR_SIZE 512
+
+extern void toi_early_boot_message(int can_erase_image, int default_answer,
+ char *warning_reason, ...);
+
+extern int do_check_can_resume(void);
+extern int do_toi_step(int step);
+extern int toi_launch_userspace_program(char *command, int channel_no,
+ int wait, int debug);
+
+extern char tuxonice_signature[9];
+
+extern int toi_start_other_threads(void);
+extern void toi_stop_other_threads(void);
+
+extern int toi_trace_index;
+#define TOI_TRACE_DEBUG(PFN, DESC, ...) \
+ do { \
+ if (test_action_state(TOI_TRACE_DEBUG_ON)) { \
+ printk("*TOI* %ld %02d" DESC "\n", PFN, toi_trace_index, ##__VA_ARGS__); \
+ } \
+ } while(0)
+
+#ifdef CONFIG_TOI_KEEP_IMAGE
+#define toi_keeping_image (test_action_state(TOI_KEEP_IMAGE) || test_action_state(TOI_INCREMENTAL_IMAGE))
+#else
+#define toi_keeping_image (0)
+#endif
+
+#ifdef CONFIG_TOI_INCREMENTAL
+extern void toi_reset_dirtiness_one(unsigned long pfn, int verbose);
+extern int toi_reset_dirtiness(int verbose);
+extern void toi_cbw_write(void);
+extern void toi_cbw_restore(void);
+extern int toi_allocate_cbw_data(void);
+extern void toi_free_cbw_data(void);
+extern int toi_cbw_init(void);
+extern void toi_mark_tasks_cbw(void);
+#else
+static inline int toi_reset_dirtiness(int verbose) { return 0; }
+#define toi_cbw_write() do { } while(0)
+#define toi_cbw_restore() do { } while(0)
+#define toi_allocate_cbw_data() do { } while(0)
+#define toi_free_cbw_data() do { } while(0)
+static inline int toi_cbw_init(void) { return 0; }
+#endif
+#endif
diff --git a/kernel/power/tuxonice_alloc.c b/kernel/power/tuxonice_alloc.c
new file mode 100644
index 000000000..5729240d8
--- /dev/null
+++ b/kernel/power/tuxonice_alloc.c
@@ -0,0 +1,308 @@
+/*
+ * kernel/power/tuxonice_alloc.c
+ *
+ * Copyright (C) 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+
+#define TOI_ALLOC_PATHS 41
+
+static DEFINE_MUTEX(toi_alloc_mutex);
+
+static struct toi_module_ops toi_alloc_ops;
+
+static int toi_fail_num;
+
+static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
+ toi_free_count[TOI_ALLOC_PATHS],
+ toi_test_count[TOI_ALLOC_PATHS],
+ toi_fail_count[TOI_ALLOC_PATHS];
+static int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
+static int cur_allocd, max_allocd;
+
+static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
+ "", /* 0 */
+ "get_io_info_struct",
+ "extent",
+ "extent (loading chain)",
+ "userui channel",
+ "userui arg", /* 5 */
+ "attention list metadata",
+ "extra pagedir memory metadata",
+ "bdev metadata",
+ "extra pagedir memory",
+ "header_locations_read", /* 10 */
+ "bio queue",
+ "prepare_readahead",
+ "i/o buffer",
+ "writer buffer in bio_init",
+ "checksum buffer", /* 15 */
+ "compression buffer",
+ "filewriter signature op",
+ "set resume param alloc1",
+ "set resume param alloc2",
+ "debugging info buffer", /* 20 */
+ "check can resume buffer",
+ "write module config buffer",
+ "read module config buffer",
+ "write image header buffer",
+ "read pageset1 buffer", /* 25 */
+ "get_have_image_data buffer",
+ "checksum page",
+ "worker rw loop",
+ "get nonconflicting page",
+ "ps1 load addresses", /* 30 */
+ "remove swap image",
+ "swap image exists",
+ "swap parse sig location",
+ "sysfs kobj",
+ "swap mark resume attempted buffer", /* 35 */
+ "cluster member",
+ "boot kernel data buffer",
+ "setting swap signature",
+ "block i/o bdev struct",
+ "copy before write", /* 40 */
+};
+
+#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
+ do { \
+ BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
+ \
+ if (FAIL_NUM == toi_fail_num) { \
+ atomic_inc(&toi_test_count[FAIL_NUM]); \
+ toi_fail_num = 0; \
+ return FAIL_VAL; \
+ } \
+ } while (0)
+
+static void alloc_update_stats(int fail_num, void *result, int size)
+{
+ if (!result) {
+ atomic_inc(&toi_fail_count[fail_num]);
+ return;
+ }
+
+ atomic_inc(&toi_alloc_count[fail_num]);
+ if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+ mutex_lock(&toi_alloc_mutex);
+ toi_cur_allocd[fail_num]++;
+ cur_allocd += size;
+ if (unlikely(cur_allocd > max_allocd)) {
+ int i;
+
+ for (i = 0; i < TOI_ALLOC_PATHS; i++)
+ toi_max_allocd[i] = toi_cur_allocd[i];
+ max_allocd = cur_allocd;
+ }
+ mutex_unlock(&toi_alloc_mutex);
+ }
+}
+
+static void free_update_stats(int fail_num, int size)
+{
+ BUG_ON(fail_num >= TOI_ALLOC_PATHS);
+ atomic_inc(&toi_free_count[fail_num]);
+ if (unlikely(atomic_read(&toi_free_count[fail_num]) >
+ atomic_read(&toi_alloc_count[fail_num])))
+ dump_stack();
+ if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+ mutex_lock(&toi_alloc_mutex);
+ cur_allocd -= size;
+ toi_cur_allocd[fail_num]--;
+ mutex_unlock(&toi_alloc_mutex);
+ }
+}
+
+void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
+{
+ void *result;
+
+ if (toi_alloc_ops.enabled)
+ MIGHT_FAIL(fail_num, NULL);
+ result = kzalloc(size, flags);
+ if (toi_alloc_ops.enabled)
+ alloc_update_stats(fail_num, result, size);
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ return result;
+}
+
+unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+ unsigned int order)
+{
+ unsigned long result;
+
+ mask |= ___GFP_TOI_NOTRACK;
+ if (toi_alloc_ops.enabled)
+ MIGHT_FAIL(fail_num, 0);
+ result = __get_free_pages(mask, order);
+ if (toi_alloc_ops.enabled)
+ alloc_update_stats(fail_num, (void *) result,
+ PAGE_SIZE << order);
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ return result;
+}
+
+struct page *toi_alloc_page(int fail_num, gfp_t mask)
+{
+ struct page *result;
+
+ if (toi_alloc_ops.enabled)
+ MIGHT_FAIL(fail_num, NULL);
+ mask |= ___GFP_TOI_NOTRACK;
+ result = alloc_page(mask);
+ if (toi_alloc_ops.enabled)
+ alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ return result;
+}
+
+unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
+{
+ unsigned long result;
+
+ if (toi_alloc_ops.enabled)
+ MIGHT_FAIL(fail_num, 0);
+ mask |= ___GFP_TOI_NOTRACK;
+ result = get_zeroed_page(mask);
+ if (toi_alloc_ops.enabled)
+ alloc_update_stats(fail_num, (void *) result, PAGE_SIZE);
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ return result;
+}
+
+void toi_kfree(int fail_num, const void *arg, int size)
+{
+ if (arg && toi_alloc_ops.enabled)
+ free_update_stats(fail_num, size);
+
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ kfree(arg);
+}
+
+void toi_free_page(int fail_num, unsigned long virt)
+{
+ if (virt && toi_alloc_ops.enabled)
+ free_update_stats(fail_num, PAGE_SIZE);
+
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ free_page(virt);
+}
+
+void toi__free_page(int fail_num, struct page *page)
+{
+ if (page && toi_alloc_ops.enabled)
+ free_update_stats(fail_num, PAGE_SIZE);
+
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ __free_page(page);
+}
+
+void toi_free_pages(int fail_num, struct page *page, int order)
+{
+ if (page && toi_alloc_ops.enabled)
+ free_update_stats(fail_num, PAGE_SIZE << order);
+
+ if (fail_num == toi_trace_allocs)
+ dump_stack();
+ __free_pages(page, order);
+}
+
+void toi_alloc_print_debug_stats(void)
+{
+ int i, header_done = 0;
+
+ if (!toi_alloc_ops.enabled)
+ return;
+
+ for (i = 0; i < TOI_ALLOC_PATHS; i++)
+ if (atomic_read(&toi_alloc_count[i]) !=
+ atomic_read(&toi_free_count[i])) {
+ if (!header_done) {
+ printk(KERN_INFO "Idx Allocs Frees Tests "
+ " Fails Max Description\n");
+ header_done = 1;
+ }
+
+ printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
+ atomic_read(&toi_alloc_count[i]),
+ atomic_read(&toi_free_count[i]),
+ atomic_read(&toi_test_count[i]),
+ atomic_read(&toi_fail_count[i]),
+ toi_max_allocd[i],
+ toi_alloc_desc[i]);
+ }
+}
+
+static int toi_alloc_initialise(int starting_cycle)
+{
+ int i;
+
+ if (!starting_cycle)
+ return 0;
+
+ if (toi_trace_allocs)
+ dump_stack();
+
+ for (i = 0; i < TOI_ALLOC_PATHS; i++) {
+ atomic_set(&toi_alloc_count[i], 0);
+ atomic_set(&toi_free_count[i], 0);
+ atomic_set(&toi_test_count[i], 0);
+ atomic_set(&toi_fail_count[i], 0);
+ toi_cur_allocd[i] = 0;
+ toi_max_allocd[i] = 0;
+ };
+
+ max_allocd = 0;
+ cur_allocd = 0;
+ return 0;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_INT("failure_test", SYSFS_RW, &toi_fail_num, 0, 99, 0, NULL),
+ SYSFS_INT("trace", SYSFS_RW, &toi_trace_allocs, 0, TOI_ALLOC_PATHS, 0,
+ NULL),
+ SYSFS_BIT("find_max_mem_allocated", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_GET_MAX_MEM_ALLOCD, 0),
+ SYSFS_INT("enabled", SYSFS_RW, &toi_alloc_ops.enabled, 0, 1, 0,
+ NULL)
+};
+
+static struct toi_module_ops toi_alloc_ops = {
+ .type = MISC_HIDDEN_MODULE,
+ .name = "allocation debugging",
+ .directory = "alloc",
+ .module = THIS_MODULE,
+ .early = 1,
+ .initialise = toi_alloc_initialise,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+int toi_alloc_init(void)
+{
+ int result = toi_register_module(&toi_alloc_ops);
+ return result;
+}
+
+void toi_alloc_exit(void)
+{
+ toi_unregister_module(&toi_alloc_ops);
+}
diff --git a/kernel/power/tuxonice_alloc.h b/kernel/power/tuxonice_alloc.h
new file mode 100644
index 000000000..28c5af193
--- /dev/null
+++ b/kernel/power/tuxonice_alloc.h
@@ -0,0 +1,54 @@
+/*
+ * kernel/power/tuxonice_alloc.h
+ *
+ * Copyright (C) 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/slab.h>
+#define TOI_WAIT_GFP (GFP_NOFS | __GFP_NOWARN)
+#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
+
+#ifdef CONFIG_PM_DEBUG
+extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
+extern void toi_kfree(int fail_num, const void *arg, int size);
+
+extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+ unsigned int order);
+#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
+extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
+extern void toi_free_page(int fail_num, unsigned long buf);
+extern void toi__free_page(int fail_num, struct page *page);
+extern void toi_free_pages(int fail_num, struct page *page, int order);
+extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
+extern int toi_alloc_init(void);
+extern void toi_alloc_exit(void);
+
+extern void toi_alloc_print_debug_stats(void);
+
+#else /* CONFIG_PM_DEBUG */
+
+#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
+#define toi_kfree(FAIL, ALLOCN, SIZE) (kfree(ALLOCN))
+
+#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
+#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
+#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
+#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
+#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
+#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
+#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
+static inline int toi_alloc_init(void)
+{
+ return 0;
+}
+
+static inline void toi_alloc_exit(void) { }
+
+static inline void toi_alloc_print_debug_stats(void) { }
+
+#endif
+
+extern int toi_trace_allocs;
diff --git a/kernel/power/tuxonice_atomic_copy.c b/kernel/power/tuxonice_atomic_copy.c
new file mode 100644
index 000000000..7b9886f54
--- /dev/null
+++ b/kernel/power/tuxonice_atomic_copy.c
@@ -0,0 +1,469 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.c
+ *
+ * Copyright 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/syscore_ops.h>
+#include <linux/ftrace.h>
+#include <asm/suspend.h>
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_io.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_modules.h"
+
+unsigned long extra_pd1_pages_used;
+
+/**
+ * free_pbe_list - free page backup entries used by the atomic copy code.
+ * @list: List to free.
+ * @highmem: Whether the list is in highmem.
+ *
+ * Normally, this function isn't used. If, however, we need to abort before
+ * doing the atomic copy, we use this to free the pbes previously allocated.
+ **/
+static void free_pbe_list(struct pbe **list, int highmem)
+{
+ while (*list) {
+ int i;
+ struct pbe *free_pbe, *next_page = NULL;
+ struct page *page;
+
+ if (highmem) {
+ page = (struct page *) *list;
+ free_pbe = (struct pbe *) kmap(page);
+ } else {
+ page = virt_to_page(*list);
+ free_pbe = *list;
+ }
+
+ for (i = 0; i < PBES_PER_PAGE; i++) {
+ if (!free_pbe)
+ break;
+ if (highmem)
+ toi__free_page(29, free_pbe->address);
+ else
+ toi_free_page(29,
+ (unsigned long) free_pbe->address);
+ free_pbe = free_pbe->next;
+ }
+
+ if (highmem) {
+ if (free_pbe)
+ next_page = free_pbe;
+ kunmap(page);
+ } else {
+ if (free_pbe)
+ next_page = free_pbe;
+ }
+
+ toi__free_page(29, page);
+ *list = (struct pbe *) next_page;
+ };
+}
+
+/**
+ * copyback_post - post atomic-restore actions
+ *
+ * After doing the atomic restore, we have a few more things to do:
+ * 1) We want to retain some values across the restore, so we now copy
+ * these from the nosave variables to the normal ones.
+ * 2) Set the status flags.
+ * 3) Resume devices.
+ * 4) Tell userui so it can redraw & restore settings.
+ * 5) Reread the page cache.
+ **/
+void copyback_post(void)
+{
+ struct toi_boot_kernel_data *bkd =
+ (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+ if (toi_activate_storage(1))
+ panic("Failed to reactivate our storage.");
+
+ toi_post_atomic_restore_modules(bkd);
+
+ toi_cond_pause(1, "About to reload secondary pagedir.");
+
+ if (read_pageset2(0))
+ panic("Unable to successfully reread the page cache.");
+
+ /*
+ * If the user wants to sleep again after resuming from full-off,
+ * it's most likely to be in order to suspend to ram, so we'll
+ * do this check after loading pageset2, to give them the fastest
+ * wakeup when they are ready to use the computer again.
+ */
+ toi_check_resleep();
+
+ if (test_action_state(TOI_INCREMENTAL_IMAGE))
+ toi_reset_dirtiness(1);
+}
+
+/**
+ * toi_copy_pageset1 - do the atomic copy of pageset1
+ *
+ * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
+ * because we can't be sure what side effects it has. On my old Duron, with
+ * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
+ * count at resume time 4 instead of 3.
+ *
+ * We don't want to call kmap_atomic unconditionally because it has the side
+ * effect of incrementing the preempt count, which will leave it one too high
+ * post resume (the page containing the preempt count will be copied after
+ * its incremented. This is essentially the same problem.
+ **/
+void toi_copy_pageset1(void)
+{
+ int i;
+ unsigned long source_index, dest_index;
+
+ memory_bm_position_reset(pageset1_map);
+ memory_bm_position_reset(pageset1_copy_map);
+
+ source_index = memory_bm_next_pfn(pageset1_map, 0);
+ dest_index = memory_bm_next_pfn(pageset1_copy_map, 0);
+
+ for (i = 0; i < pagedir1.size; i++) {
+ unsigned long *origvirt, *copyvirt;
+ struct page *origpage, *copypage;
+ int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
+ was_present1, was_present2;
+
+ origpage = pfn_to_page(source_index);
+ copypage = pfn_to_page(dest_index);
+
+ origvirt = PageHighMem(origpage) ?
+ kmap_atomic(origpage) :
+ page_address(origpage);
+
+ copyvirt = PageHighMem(copypage) ?
+ kmap_atomic(copypage) :
+ page_address(copypage);
+
+ was_present1 = kernel_page_present(origpage);
+ if (!was_present1)
+ kernel_map_pages(origpage, 1, 1);
+
+ was_present2 = kernel_page_present(copypage);
+ if (!was_present2)
+ kernel_map_pages(copypage, 1, 1);
+
+ while (loop >= 0) {
+ *(copyvirt + loop) = *(origvirt + loop);
+ loop--;
+ }
+
+ if (!was_present1)
+ kernel_map_pages(origpage, 1, 0);
+
+ if (!was_present2)
+ kernel_map_pages(copypage, 1, 0);
+
+ if (PageHighMem(origpage))
+ kunmap_atomic(origvirt);
+
+ if (PageHighMem(copypage))
+ kunmap_atomic(copyvirt);
+
+ source_index = memory_bm_next_pfn(pageset1_map, 0);
+ dest_index = memory_bm_next_pfn(pageset1_copy_map, 0);
+ }
+}
+
+/**
+ * __toi_post_context_save - steps after saving the cpu context
+ *
+ * Steps taken after saving the CPU state to make the actual
+ * atomic copy.
+ *
+ * Called from swsusp_save in snapshot.c via toi_post_context_save.
+ **/
+int __toi_post_context_save(void)
+{
+ unsigned long old_ps1_size = pagedir1.size;
+
+ check_checksums();
+
+ free_checksum_pages();
+
+ toi_recalculate_image_contents(1);
+
+ extra_pd1_pages_used = pagedir1.size > old_ps1_size ?
+ pagedir1.size - old_ps1_size : 0;
+
+ if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
+ printk(KERN_INFO "Pageset1 has grown by %lu pages. "
+ "extra_pages_allowance is currently only %lu.\n",
+ pagedir1.size - old_ps1_size,
+ extra_pd1_pages_allowance);
+
+ /*
+ * Highlevel code will see this, clear the state and
+ * retry if we haven't already done so twice.
+ */
+ if (any_to_free(1)) {
+ set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+ return 1;
+ }
+ if (try_allocate_extra_memory()) {
+ printk(KERN_INFO "Failed to allocate the extra memory"
+ " needed. Restarting the process.");
+ set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+ return 1;
+ }
+ printk(KERN_INFO "However it looks like there's enough"
+ " free ram and storage to handle this, so "
+ " continuing anyway.");
+ /*
+ * What if try_allocate_extra_memory above calls
+ * toi_allocate_extra_pagedir_memory and it allocs a new
+ * slab page via toi_kzalloc which should be in ps1? So...
+ */
+ toi_recalculate_image_contents(1);
+ }
+
+ if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
+ !test_action_state(TOI_TEST_BIO))
+ toi_copy_pageset1();
+
+ return 0;
+}
+
+/**
+ * toi_hibernate - high level code for doing the atomic copy
+ *
+ * High-level code which prepares to do the atomic copy. Loosely based
+ * on the swsusp version, but with the following twists:
+ * - We set toi_running so the swsusp code uses our code paths.
+ * - We give better feedback regarding what goes wrong if there is a
+ * problem.
+ * - We use an extra function to call the assembly, just in case this code
+ * is in a module (return address).
+ **/
+int toi_hibernate(void)
+{
+ int error;
+
+ error = toi_lowlevel_builtin();
+
+ if (!error) {
+ struct toi_boot_kernel_data *bkd =
+ (struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+ /*
+ * The boot kernel's data may be larger (newer version) or
+ * smaller (older version) than ours. Copy the minimum
+ * of the two sizes, so that we don't overwrite valid values
+ * from pre-atomic copy.
+ */
+
+ memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
+ min_t(int, sizeof(struct toi_boot_kernel_data),
+ bkd->size));
+ }
+
+ return error;
+}
+
+/**
+ * toi_atomic_restore - prepare to do the atomic restore
+ *
+ * Get ready to do the atomic restore. This part gets us into the same
+ * state we are in prior to do calling do_toi_lowlevel while
+ * hibernating: hot-unplugging secondary cpus and freeze processes,
+ * before starting the thread that will do the restore.
+ **/
+int toi_atomic_restore(void)
+{
+ int error;
+
+ toi_prepare_status(DONT_CLEAR_BAR, "Atomic restore.");
+
+ memcpy(&toi_bkd.toi_nosave_commandline, saved_command_line,
+ strlen(saved_command_line));
+
+ toi_pre_atomic_restore_modules(&toi_bkd);
+
+ if (add_boot_kernel_data_pbe())
+ goto Failed;
+
+ toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+ if (toi_go_atomic(PMSG_QUIESCE, 0))
+ goto Failed;
+
+ /* We'll ignore saved state, but this gets preempt count (etc) right */
+ save_processor_state();
+
+ error = swsusp_arch_resume();
+ /*
+ * Code below is only ever reached in case of failure. Otherwise
+ * execution continues at place where swsusp_arch_suspend was called.
+ *
+ * We don't know whether it's safe to continue (this shouldn't happen),
+ * so lets err on the side of caution.
+ */
+ BUG();
+
+Failed:
+ free_pbe_list(&restore_pblist, 0);
+#ifdef CONFIG_HIGHMEM
+ free_pbe_list(&restore_highmem_pblist, 1);
+#endif
+ return 1;
+}
+
+/**
+ * toi_go_atomic - do the actual atomic copy/restore
+ * @state: The state to use for dpm_suspend_start & power_down calls.
+ * @suspend_time: Whether we're suspending or resuming.
+ **/
+int toi_go_atomic(pm_message_t state, int suspend_time)
+{
+ if (suspend_time) {
+ if (platform_begin(1)) {
+ set_abort_result(TOI_PLATFORM_PREP_FAILED);
+ toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3);
+ return 1;
+ }
+
+ if (dpm_prepare(PMSG_FREEZE)) {
+ set_abort_result(TOI_DPM_PREPARE_FAILED);
+ dpm_complete(PMSG_RECOVER);
+ toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 3);
+ return 1;
+ }
+ }
+
+ suspend_console();
+ pm_restrict_gfp_mask();
+
+ if (suspend_time) {
+ if (dpm_suspend(state)) {
+ set_abort_result(TOI_DPM_SUSPEND_FAILED);
+ toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
+ return 1;
+ }
+ } else {
+ if (dpm_suspend_start(state)) {
+ set_abort_result(TOI_DPM_SUSPEND_FAILED);
+ toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 3);
+ return 1;
+ }
+ }
+
+ /* At this point, dpm_suspend_start() has been called, but *not*
+ * dpm_suspend_noirq(). We *must* dpm_suspend_noirq() now.
+ * Otherwise, drivers for some devices (e.g. interrupt controllers)
+ * become desynchronized with the actual state of the hardware
+ * at resume time, and evil weirdness ensues.
+ */
+
+ if (dpm_suspend_end(state)) {
+ set_abort_result(TOI_DEVICE_REFUSED);
+ toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 1);
+ return 1;
+ }
+
+ if (suspend_time) {
+ if (platform_pre_snapshot(1))
+ set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
+ } else {
+ if (platform_pre_restore(1))
+ set_abort_result(TOI_PRE_RESTORE_FAILED);
+ }
+
+ if (test_result_state(TOI_ABORTED)) {
+ toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 1);
+ return 1;
+ }
+
+ if (disable_nonboot_cpus()) {
+ set_abort_result(TOI_CPU_HOTPLUG_FAILED);
+ toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
+ suspend_time, 1);
+ return 1;
+ }
+
+ local_irq_disable();
+
+ if (syscore_suspend()) {
+ set_abort_result(TOI_SYSCORE_REFUSED);
+ toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 1);
+ return 1;
+ }
+
+ if (suspend_time && pm_wakeup_pending()) {
+ set_abort_result(TOI_WAKEUP_EVENT);
+ toi_end_atomic(ATOMIC_STEP_SYSCORE_RESUME, suspend_time, 1);
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * toi_end_atomic - post atomic copy/restore routines
+ * @stage: What step to start at.
+ * @suspend_time: Whether we're suspending or resuming.
+ * @error: Whether we're recovering from an error.
+ **/
+void toi_end_atomic(int stage, int suspend_time, int error)
+{
+ pm_message_t msg = suspend_time ? (error ? PMSG_RECOVER : PMSG_THAW) :
+ PMSG_RESTORE;
+
+ switch (stage) {
+ case ATOMIC_ALL_STEPS:
+ if (!suspend_time) {
+ events_check_enabled = false;
+ }
+ platform_leave(1);
+ case ATOMIC_STEP_SYSCORE_RESUME:
+ syscore_resume();
+ case ATOMIC_STEP_IRQS:
+ local_irq_enable();
+ case ATOMIC_STEP_CPU_HOTPLUG:
+ enable_nonboot_cpus();
+ case ATOMIC_STEP_PLATFORM_FINISH:
+ if (!suspend_time && error & 2)
+ platform_restore_cleanup(1);
+ else
+ platform_finish(1);
+ dpm_resume_start(msg);
+ case ATOMIC_STEP_DEVICE_RESUME:
+ if (suspend_time && (error & 2))
+ platform_recover(1);
+ dpm_resume(msg);
+ if (!toi_in_suspend()) {
+ dpm_resume_end(PMSG_RECOVER);
+ }
+ if (error || !toi_in_suspend()) {
+ pm_restore_gfp_mask();
+ }
+ resume_console();
+ case ATOMIC_STEP_DPM_COMPLETE:
+ dpm_complete(msg);
+ case ATOMIC_STEP_PLATFORM_END:
+ platform_end(1);
+
+ toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
+ }
+}
diff --git a/kernel/power/tuxonice_atomic_copy.h b/kernel/power/tuxonice_atomic_copy.h
new file mode 100644
index 000000000..2de0e3b49
--- /dev/null
+++ b/kernel/power/tuxonice_atomic_copy.h
@@ -0,0 +1,25 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.h
+ *
+ * Copyright 2008-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+enum {
+ ATOMIC_ALL_STEPS,
+ ATOMIC_STEP_SYSCORE_RESUME,
+ ATOMIC_STEP_IRQS,
+ ATOMIC_STEP_CPU_HOTPLUG,
+ ATOMIC_STEP_PLATFORM_FINISH,
+ ATOMIC_STEP_DEVICE_RESUME,
+ ATOMIC_STEP_DPM_COMPLETE,
+ ATOMIC_STEP_PLATFORM_END,
+};
+
+int toi_go_atomic(pm_message_t state, int toi_time);
+void toi_end_atomic(int stage, int toi_time, int error);
+
+extern void platform_recover(int platform_mode);
diff --git a/kernel/power/tuxonice_bio.h b/kernel/power/tuxonice_bio.h
new file mode 100644
index 000000000..201e3cd47
--- /dev/null
+++ b/kernel/power/tuxonice_bio.h
@@ -0,0 +1,78 @@
+/*
+ * kernel/power/tuxonice_bio.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_bio.c, which contains low level io functions.
+ */
+
+#include <linux/buffer_head.h>
+#include "tuxonice_extent.h"
+
+void toi_put_extent_chain(struct hibernate_extent_chain *chain);
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+ unsigned long start, unsigned long end);
+
+struct hibernate_extent_saved_state {
+ int extent_num;
+ struct hibernate_extent *extent_ptr;
+ unsigned long offset;
+};
+
+struct toi_bdev_info {
+ struct toi_bdev_info *next;
+ struct hibernate_extent_chain blocks;
+ struct block_device *bdev;
+ struct toi_module_ops *allocator;
+ int allocator_index;
+ struct hibernate_extent_chain allocations;
+ char name[266]; /* "swap on " or "file " + up to 256 chars */
+
+ /* Saved in header */
+ char uuid[17];
+ dev_t dev_t;
+ int prio;
+ int bmap_shift;
+ int blocks_per_page;
+ unsigned long pages_used;
+ struct hibernate_extent_saved_state saved_state[4];
+};
+
+struct toi_extent_iterate_state {
+ struct toi_bdev_info *current_chain;
+ int num_chains;
+ int saved_chain_number[4];
+ struct toi_bdev_info *saved_chain_ptr[4];
+};
+
+/*
+ * Our exported interface so the swapwriter and filewriter don't
+ * need these functions duplicated.
+ */
+struct toi_bio_ops {
+ int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
+ struct page *page);
+ int (*register_storage)(struct toi_bdev_info *new);
+ void (*free_storage)(void);
+};
+
+struct toi_allocator_ops {
+ unsigned long (*toi_swap_storage_available) (void);
+};
+
+extern struct toi_bio_ops toi_bio_ops;
+
+extern char *toi_writer_buffer;
+extern int toi_writer_buffer_posn;
+
+struct toi_bio_allocator_ops {
+ int (*register_storage) (void);
+ unsigned long (*storage_available)(void);
+ int (*allocate_storage) (struct toi_bdev_info *, unsigned long);
+ int (*bmap) (struct toi_bdev_info *);
+ void (*free_storage) (struct toi_bdev_info *);
+ unsigned long (*free_unused_storage) (struct toi_bdev_info *, unsigned long used);
+};
diff --git a/kernel/power/tuxonice_bio_chains.c b/kernel/power/tuxonice_bio_chains.c
new file mode 100644
index 000000000..364fae9db
--- /dev/null
+++ b/kernel/power/tuxonice_bio_chains.c
@@ -0,0 +1,1126 @@
+/*
+ * kernel/power/tuxonice_bio_devinfo.c
+ *
+ * Copyright (C) 2009-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ */
+
+#include <linux/mm_types.h>
+#include "tuxonice_bio.h"
+#include "tuxonice_bio_internal.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+#include "tuxonice_io.h"
+
+static struct toi_bdev_info *prio_chain_head;
+static int num_chains;
+
+/* Pointer to current entry being loaded/saved. */
+struct toi_extent_iterate_state toi_writer_posn;
+
+#define metadata_size (sizeof(struct toi_bdev_info) - \
+ offsetof(struct toi_bdev_info, uuid))
+
+/*
+ * After section 0 (header) comes 2 => next_section[0] = 2
+ */
+static int next_section[3] = { 2, 3, 1 };
+
+/**
+ * dump_block_chains - print the contents of the bdev info array.
+ **/
+void dump_block_chains(void)
+{
+ int i = 0;
+ int j;
+ struct toi_bdev_info *cur_chain = prio_chain_head;
+
+ while (cur_chain) {
+ struct hibernate_extent *this = cur_chain->blocks.first;
+
+ printk(KERN_DEBUG "Chain %d (prio %d):", i, cur_chain->prio);
+
+ while (this) {
+ printk(KERN_CONT " [%lu-%lu]%s", this->start,
+ this->end, this->next ? "," : "");
+ this = this->next;
+ }
+
+ printk("\n");
+ cur_chain = cur_chain->next;
+ i++;
+ }
+
+ printk(KERN_DEBUG "Saved states:\n");
+ for (i = 0; i < 4; i++) {
+ printk(KERN_DEBUG "Slot %d: Chain %d.\n",
+ i, toi_writer_posn.saved_chain_number[i]);
+
+ cur_chain = prio_chain_head;
+ j = 0;
+ while (cur_chain) {
+ printk(KERN_DEBUG " Chain %d: Extent %d. Offset %lu.\n",
+ j, cur_chain->saved_state[i].extent_num,
+ cur_chain->saved_state[i].offset);
+ cur_chain = cur_chain->next;
+ j++;
+ }
+ printk(KERN_CONT "\n");
+ }
+}
+
+/**
+ *
+ **/
+static void toi_extent_chain_next(void)
+{
+ struct toi_bdev_info *this = toi_writer_posn.current_chain;
+
+ if (!this->blocks.current_extent)
+ return;
+
+ if (this->blocks.current_offset == this->blocks.current_extent->end) {
+ if (this->blocks.current_extent->next) {
+ this->blocks.current_extent =
+ this->blocks.current_extent->next;
+ this->blocks.current_offset =
+ this->blocks.current_extent->start;
+ } else {
+ this->blocks.current_extent = NULL;
+ this->blocks.current_offset = 0;
+ }
+ } else
+ this->blocks.current_offset++;
+}
+
+/**
+ *
+ */
+
+static struct toi_bdev_info *__find_next_chain_same_prio(void)
+{
+ struct toi_bdev_info *start_chain = toi_writer_posn.current_chain;
+ struct toi_bdev_info *this = start_chain;
+ int orig_prio = this->prio;
+
+ do {
+ this = this->next;
+
+ if (!this)
+ this = prio_chain_head;
+
+ /* Back on original chain? Use it again. */
+ if (this == start_chain)
+ return start_chain;
+
+ } while (!this->blocks.current_extent || this->prio != orig_prio);
+
+ return this;
+}
+
+static void find_next_chain(void)
+{
+ struct toi_bdev_info *this;
+
+ this = __find_next_chain_same_prio();
+
+ /*
+ * If we didn't get another chain of the same priority that we
+ * can use, look for the next priority.
+ */
+ while (this && !this->blocks.current_extent)
+ this = this->next;
+
+ toi_writer_posn.current_chain = this;
+}
+
+/**
+ * toi_extent_state_next - go to the next extent
+ * @blocks: The number of values to progress.
+ * @stripe_mode: Whether to spread usage across all chains.
+ *
+ * Given a state, progress to the next valid entry. We may begin in an
+ * invalid state, as we do when invoked after extent_state_goto_start below.
+ *
+ * When using compression and expected_compression > 0, we let the image size
+ * be larger than storage, so we can validly run out of data to return.
+ **/
+static unsigned long toi_extent_state_next(int blocks, int current_stream)
+{
+ int i;
+
+ if (!toi_writer_posn.current_chain)
+ return -ENOSPC;
+
+ /* Assume chains always have lengths that are multiples of @blocks */
+ for (i = 0; i < blocks; i++)
+ toi_extent_chain_next();
+
+ /* The header stream is not striped */
+ if (current_stream ||
+ !toi_writer_posn.current_chain->blocks.current_extent)
+ find_next_chain();
+
+ return toi_writer_posn.current_chain ? 0 : -ENOSPC;
+}
+
+static void toi_insert_chain_in_prio_list(struct toi_bdev_info *this)
+{
+ struct toi_bdev_info **prev_ptr;
+ struct toi_bdev_info *cur;
+
+ /* Loop through the existing chain, finding where to insert it */
+ prev_ptr = &prio_chain_head;
+ cur = prio_chain_head;
+
+ while (cur && cur->prio >= this->prio) {
+ prev_ptr = &cur->next;
+ cur = cur->next;
+ }
+
+ this->next = *prev_ptr;
+ *prev_ptr = this;
+
+ this = prio_chain_head;
+ while (this)
+ this = this->next;
+ num_chains++;
+}
+
+/**
+ * toi_extent_state_goto_start - reinitialize an extent chain iterator
+ * @state: Iterator to reinitialize
+ **/
+void toi_extent_state_goto_start(void)
+{
+ struct toi_bdev_info *this = prio_chain_head;
+
+ while (this) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Setting current extent to %p.", this->blocks.first);
+ this->blocks.current_extent = this->blocks.first;
+ if (this->blocks.current_extent) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Setting current offset to %lu.",
+ this->blocks.current_extent->start);
+ this->blocks.current_offset =
+ this->blocks.current_extent->start;
+ }
+
+ this = this->next;
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Setting current chain to %p.",
+ prio_chain_head);
+ toi_writer_posn.current_chain = prio_chain_head;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Leaving extent state goto start.");
+}
+
+/**
+ * toi_extent_state_save - save state of the iterator
+ * @state: Current state of the chain
+ * @saved_state: Iterator to populate
+ *
+ * Given a state and a struct hibernate_extent_state_store, save the current
+ * position in a format that can be used with relocated chains (at
+ * resume time).
+ **/
+void toi_extent_state_save(int slot)
+{
+ struct toi_bdev_info *cur_chain = prio_chain_head;
+ struct hibernate_extent *extent;
+ struct hibernate_extent_saved_state *chain_state;
+ int i = 0;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_extent_state_save, slot %d.",
+ slot);
+
+ if (!toi_writer_posn.current_chain) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current chain => "
+ "chain_num = -1.");
+ toi_writer_posn.saved_chain_number[slot] = -1;
+ return;
+ }
+
+ while (cur_chain) {
+ i++;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saving chain %d (%p) "
+ "state, slot %d.", i, cur_chain, slot);
+
+ chain_state = &cur_chain->saved_state[slot];
+
+ chain_state->offset = cur_chain->blocks.current_offset;
+
+ if (toi_writer_posn.current_chain == cur_chain) {
+ toi_writer_posn.saved_chain_number[slot] = i;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "This is the chain "
+ "we were on => chain_num is %d.", i);
+ }
+
+ if (!cur_chain->blocks.current_extent) {
+ chain_state->extent_num = 0;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "No current extent "
+ "for this chain => extent_num %d is 0.",
+ i);
+ cur_chain = cur_chain->next;
+ continue;
+ }
+
+ extent = cur_chain->blocks.first;
+ chain_state->extent_num = 1;
+
+ while (extent != cur_chain->blocks.current_extent) {
+ chain_state->extent_num++;
+ extent = extent->next;
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "extent num %d is %d.", i,
+ chain_state->extent_num);
+
+ cur_chain = cur_chain->next;
+ }
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Completed saving extent state slot %d.", slot);
+}
+
+/**
+ * toi_extent_state_restore - restore the position saved by extent_state_save
+ * @state: State to populate
+ * @saved_state: Iterator saved to restore
+ **/
+void toi_extent_state_restore(int slot)
+{
+ int i = 0;
+ struct toi_bdev_info *cur_chain = prio_chain_head;
+ struct hibernate_extent_saved_state *chain_state;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "toi_extent_state_restore - slot %d.", slot);
+
+ if (toi_writer_posn.saved_chain_number[slot] == -1) {
+ toi_writer_posn.current_chain = NULL;
+ return;
+ }
+
+ while (cur_chain) {
+ int posn;
+ int j;
+ i++;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Restoring chain %d (%p) "
+ "state, slot %d.", i, cur_chain, slot);
+
+ chain_state = &cur_chain->saved_state[slot];
+
+ posn = chain_state->extent_num;
+
+ cur_chain->blocks.current_extent = cur_chain->blocks.first;
+ cur_chain->blocks.current_offset = chain_state->offset;
+
+ if (i == toi_writer_posn.saved_chain_number[slot]) {
+ toi_writer_posn.current_chain = cur_chain;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Found current chain.");
+ }
+
+ for (j = 0; j < 4; j++)
+ if (i == toi_writer_posn.saved_chain_number[j]) {
+ toi_writer_posn.saved_chain_ptr[j] = cur_chain;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Found saved chain ptr %d (%p) (offset"
+ " %d).", j, cur_chain,
+ cur_chain->saved_state[j].offset);
+ }
+
+ if (posn) {
+ while (--posn)
+ cur_chain->blocks.current_extent =
+ cur_chain->blocks.current_extent->next;
+ } else
+ cur_chain->blocks.current_extent = NULL;
+
+ cur_chain = cur_chain->next;
+ }
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done.");
+ if (test_action_state(TOI_LOGALL))
+ dump_block_chains();
+}
+
+/*
+ * Storage needed
+ *
+ * Returns amount of space in the image header required
+ * for the chain data. This ignores the links between
+ * pages, which we factor in when allocating the space.
+ */
+int toi_bio_devinfo_storage_needed(void)
+{
+ int result = sizeof(num_chains);
+ struct toi_bdev_info *chain = prio_chain_head;
+
+ while (chain) {
+ result += metadata_size;
+
+ /* Chain size */
+ result += sizeof(int);
+
+ /* Extents */
+ result += (2 * sizeof(unsigned long) *
+ chain->blocks.num_extents);
+
+ chain = chain->next;
+ }
+
+ result += 4 * sizeof(int);
+ return result;
+}
+
+static unsigned long chain_pages_used(struct toi_bdev_info *chain)
+{
+ struct hibernate_extent *this = chain->blocks.first;
+ struct hibernate_extent_saved_state *state = &chain->saved_state[3];
+ unsigned long size = 0;
+ int extent_idx = 1;
+
+ if (!state->extent_num) {
+ if (!this)
+ return 0;
+ else
+ return chain->blocks.size;
+ }
+
+ while (extent_idx < state->extent_num) {
+ size += (this->end - this->start + 1);
+ this = this->next;
+ extent_idx++;
+ }
+
+ /* We didn't use the one we're sitting on, so don't count it */
+ return size + state->offset - this->start;
+}
+
+void toi_bio_free_unused_storage_chain(struct toi_bdev_info *chain)
+{
+ unsigned long used = chain_pages_used(chain);
+
+ /* Free the storage */
+ unsigned long first_freed = 0;
+
+ if (chain->allocator->bio_allocator_ops->free_unused_storage)
+ first_freed = chain->allocator->bio_allocator_ops->free_unused_storage(chain, used);
+
+ printk(KERN_EMERG "Used %ld blocks in this chain. First extent freed is %lx.\n", used, first_freed);
+
+ /* Adjust / free the extents. */
+ toi_put_extent_chain_from(&chain->blocks, first_freed);
+
+ {
+ struct hibernate_extent *this = chain->blocks.first;
+ while (this) {
+ printk("Extent %lx-%lx.\n", this->start, this->end);
+ this = this->next;
+ }
+ }
+}
+
+/**
+ * toi_serialise_extent_chain - write a chain in the image
+ * @chain: Chain to write.
+ **/
+static int toi_serialise_extent_chain(struct toi_bdev_info *chain)
+{
+ struct hibernate_extent *this;
+ int ret;
+ int i = 1;
+
+ chain->pages_used = chain_pages_used(chain);
+
+ if (test_action_state(TOI_LOGALL))
+ dump_block_chains();
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Serialising chain (dev_t %lx).",
+ chain->dev_t);
+ /* Device info - dev_t, prio, bmap_shift, blocks per page, positions */
+ ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
+ (char *) &chain->uuid, metadata_size);
+ if (ret)
+ return ret;
+
+ /* Num extents */
+ ret = toiActiveAllocator->rw_header_chunk(WRITE, &toi_blockwriter_ops,
+ (char *) &chain->blocks.num_extents, sizeof(int));
+ if (ret)
+ return ret;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.",
+ chain->blocks.num_extents);
+
+ this = chain->blocks.first;
+ while (this) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i);
+ ret = toiActiveAllocator->rw_header_chunk(WRITE,
+ &toi_blockwriter_ops,
+ (char *) this, 2 * sizeof(this->start));
+ if (ret)
+ return ret;
+ this = this->next;
+ i++;
+ }
+
+ return ret;
+}
+
+int toi_serialise_extent_chains(void)
+{
+ struct toi_bdev_info *this = prio_chain_head;
+ int result;
+
+ /* Write the number of chains */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Write number of chains (%d)",
+ num_chains);
+ result = toiActiveAllocator->rw_header_chunk(WRITE,
+ &toi_blockwriter_ops, (char *) &num_chains,
+ sizeof(int));
+ if (result)
+ return result;
+
+ /* Then the chains themselves */
+ while (this) {
+ result = toi_serialise_extent_chain(this);
+ if (result)
+ return result;
+ this = this->next;
+ }
+
+ /*
+ * Finally, the chain we should be on at the start of each
+ * section.
+ */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Saved chain numbers.");
+ result = toiActiveAllocator->rw_header_chunk(WRITE,
+ &toi_blockwriter_ops,
+ (char *) &toi_writer_posn.saved_chain_number[0],
+ 4 * sizeof(int));
+
+ return result;
+}
+
+int toi_register_storage_chain(struct toi_bdev_info *new)
+{
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Inserting chain %p into list.",
+ new);
+ toi_insert_chain_in_prio_list(new);
+ return 0;
+}
+
+static void free_bdev_info(struct toi_bdev_info *chain)
+{
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Free chain %p.", chain);
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Block extents.");
+ toi_put_extent_chain(&chain->blocks);
+
+ /*
+ * The allocator may need to do more than just free the chains
+ * (swap_free, for example). Don't call from boot kernel.
+ */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Allocator extents.");
+ if (chain->allocator)
+ chain->allocator->bio_allocator_ops->free_storage(chain);
+
+ /*
+ * Dropping out of reading atomic copy? Need to undo
+ * toi_open_by_devnum.
+ */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Bdev.");
+ if (chain->bdev && !IS_ERR(chain->bdev) &&
+ chain->bdev != resume_block_device &&
+ chain->bdev != header_block_device &&
+ test_toi_state(TOI_TRYING_TO_RESUME))
+ toi_close_bdev(chain->bdev);
+
+ /* Poison */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " - Struct.");
+ toi_kfree(39, chain, sizeof(*chain));
+
+ if (prio_chain_head == chain)
+ prio_chain_head = NULL;
+
+ num_chains--;
+}
+
+void free_all_bdev_info(void)
+{
+ struct toi_bdev_info *this = prio_chain_head;
+
+ while (this) {
+ struct toi_bdev_info *next = this->next;
+ free_bdev_info(this);
+ this = next;
+ }
+
+ memset((char *) &toi_writer_posn, 0, sizeof(toi_writer_posn));
+ prio_chain_head = NULL;
+}
+
+static void set_up_start_position(void)
+{
+ toi_writer_posn.current_chain = prio_chain_head;
+ go_next_page(0, 0);
+}
+
+/**
+ * toi_load_extent_chain - read back a chain saved in the image
+ * @chain: Chain to load
+ *
+ * The linked list of extents is reconstructed from the disk. chain will point
+ * to the first entry.
+ **/
+int toi_load_extent_chain(int index, int *num_loaded)
+{
+ struct toi_bdev_info *chain = toi_kzalloc(39,
+ sizeof(struct toi_bdev_info), GFP_ATOMIC);
+ struct hibernate_extent *this, *last = NULL;
+ int i, ret;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Loading extent chain %d.", index);
+ /* Get dev_t, prio, bmap_shift, blocks per page, positions */
+ ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+ (char *) &chain->uuid, metadata_size);
+
+ if (ret) {
+ printk(KERN_ERR "Failed to read the size of extent chain.\n");
+ toi_kfree(39, chain, sizeof(*chain));
+ return 1;
+ }
+
+ toi_bkd.pages_used[index] = chain->pages_used;
+
+ ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+ (char *) &chain->blocks.num_extents, sizeof(int));
+ if (ret) {
+ printk(KERN_ERR "Failed to read the size of extent chain.\n");
+ toi_kfree(39, chain, sizeof(*chain));
+ return 1;
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d extents.",
+ chain->blocks.num_extents);
+
+ for (i = 0; i < chain->blocks.num_extents; i++) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent %d.", i + 1);
+
+ this = toi_kzalloc(2, sizeof(struct hibernate_extent),
+ TOI_ATOMIC_GFP);
+ if (!this) {
+ printk(KERN_INFO "Failed to allocate a new extent.\n");
+ free_bdev_info(chain);
+ return -ENOMEM;
+ }
+ this->next = NULL;
+ /* Get the next page */
+ ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+ NULL, (char *) this, 2 * sizeof(this->start));
+ if (ret) {
+ printk(KERN_INFO "Failed to read an extent.\n");
+ toi_kfree(2, this, sizeof(struct hibernate_extent));
+ free_bdev_info(chain);
+ return 1;
+ }
+
+ if (last)
+ last->next = this;
+ else {
+ char b1[32], b2[32], b3[32];
+ /*
+ * Open the bdev
+ */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Chain dev_t is %s. Resume dev t is %s. Header"
+ " bdev_t is %s.\n",
+ format_dev_t(b1, chain->dev_t),
+ format_dev_t(b2, resume_dev_t),
+ format_dev_t(b3, toi_sig_data->header_dev_t));
+
+ if (chain->dev_t == resume_dev_t)
+ chain->bdev = resume_block_device;
+ else if (chain->dev_t == toi_sig_data->header_dev_t)
+ chain->bdev = header_block_device;
+ else {
+ chain->bdev = toi_open_bdev(chain->uuid,
+ chain->dev_t, 1);
+ if (IS_ERR(chain->bdev)) {
+ free_bdev_info(chain);
+ return -ENODEV;
+ }
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Chain bmap shift "
+ "is %d and blocks per page is %d.",
+ chain->bmap_shift,
+ chain->blocks_per_page);
+
+ chain->blocks.first = this;
+
+ /*
+ * Couldn't do this earlier, but can't do
+ * goto_start now - we may have already used blocks
+ * in the first chain.
+ */
+ chain->blocks.current_extent = this;
+ chain->blocks.current_offset = this->start;
+
+ /*
+ * Can't wait until we've read the whole chain
+ * before we insert it in the list. We might need
+ * this chain to read the next page in the header
+ */
+ toi_insert_chain_in_prio_list(chain);
+ }
+
+ /*
+ * We have to wait until 2 extents are loaded before setting up
+ * properly because if the first extent has only one page, we
+ * will need to put the position on the second extent. Sounds
+ * obvious, but it wasn't!
+ */
+ (*num_loaded)++;
+ if ((*num_loaded) == 2)
+ set_up_start_position();
+ last = this;
+ }
+
+ /*
+ * Shouldn't get empty chains, but it's not impossible. Link them in so
+ * they get freed properly later.
+ */
+ if (!chain->blocks.num_extents)
+ toi_insert_chain_in_prio_list(chain);
+
+ if (!chain->blocks.current_extent) {
+ chain->blocks.current_extent = chain->blocks.first;
+ if (chain->blocks.current_extent)
+ chain->blocks.current_offset =
+ chain->blocks.current_extent->start;
+ }
+ return 0;
+}
+
+int toi_load_extent_chains(void)
+{
+ int result;
+ int to_load;
+ int i;
+ int extents_loaded = 0;
+
+ result = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+ (char *) &to_load,
+ sizeof(int));
+ if (result)
+ return result;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "%d chains to read.", to_load);
+
+ for (i = 0; i < to_load; i++) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " >> Loading chain %d/%d.",
+ i, to_load);
+ result = toi_load_extent_chain(i, &extents_loaded);
+ if (result)
+ return result;
+ }
+
+ /* If we never got to a second extent, we still need to do this. */
+ if (extents_loaded == 1)
+ set_up_start_position();
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Save chain numbers.");
+ result = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+ &toi_blockwriter_ops,
+ (char *) &toi_writer_posn.saved_chain_number[0],
+ 4 * sizeof(int));
+
+ return result;
+}
+
+static int toi_end_of_stream(int writing, int section_barrier)
+{
+ struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
+ int compare_to = next_section[current_stream];
+ struct toi_bdev_info *compare_chain =
+ toi_writer_posn.saved_chain_ptr[compare_to];
+ int compare_offset = compare_chain ?
+ compare_chain->saved_state[compare_to].offset : 0;
+
+ if (!section_barrier)
+ return 0;
+
+ if (!cur_chain)
+ return 1;
+
+ if (cur_chain == compare_chain &&
+ cur_chain->blocks.current_offset == compare_offset) {
+ if (writing) {
+ if (!current_stream) {
+ debug_broken_header();
+ return 1;
+ }
+ } else {
+ more_readahead = 0;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Reached the end of stream %d "
+ "(not an error).", current_stream);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * go_next_page - skip blocks to the start of the next page
+ * @writing: Whether we're reading or writing the image.
+ *
+ * Go forward one page.
+ **/
+int go_next_page(int writing, int section_barrier)
+{
+ struct toi_bdev_info *cur_chain = toi_writer_posn.current_chain;
+ int max = cur_chain ? cur_chain->blocks_per_page : 1;
+
+ /* Nope. Go foward a page - or maybe two. Don't stripe the header,
+ * so that bad fragmentation doesn't put the extent data containing
+ * the location of the second page out of the first header page.
+ */
+ if (toi_extent_state_next(max, current_stream)) {
+ /* Don't complain if readahead falls off the end */
+ if (writing && section_barrier) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Extent state eof. "
+ "Expected compression ratio too optimistic?");
+ if (test_action_state(TOI_LOGALL))
+ dump_block_chains();
+ }
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Ran out of extents to "
+ "read/write. (Not necessarily a fatal error.");
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+int devices_of_same_priority(struct toi_bdev_info *this)
+{
+ struct toi_bdev_info *check = prio_chain_head;
+ int i = 0;
+
+ while (check) {
+ if (check->prio == this->prio)
+ i++;
+ check = check->next;
+ }
+
+ return i;
+}
+
+/**
+ * toi_bio_rw_page - do i/o on the next disk page in the image
+ * @writing: Whether reading or writing.
+ * @page: Page to do i/o on.
+ * @is_readahead: Whether we're doing readahead
+ * @free_group: The group used in allocating the page
+ *
+ * Submit a page for reading or writing, possibly readahead.
+ * Pass the group used in allocating the page as well, as it should
+ * be freed on completion of the bio if we're writing the page.
+ **/
+int toi_bio_rw_page(int writing, struct page *page,
+ int is_readahead, int free_group)
+{
+ int result = toi_end_of_stream(writing, 1);
+ struct toi_bdev_info *dev_info = toi_writer_posn.current_chain;
+
+ if (result) {
+ if (writing)
+ abort_hibernate(TOI_INSUFFICIENT_STORAGE,
+ "Insufficient storage for your image.");
+ else
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking to "
+ "read/write another page when stream has "
+ "ended.");
+ return -ENOSPC;
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "%s %lx:%ld",
+ writing ? "Write" : "Read",
+ dev_info->dev_t, dev_info->blocks.current_offset);
+
+ result = toi_do_io(writing, dev_info->bdev,
+ dev_info->blocks.current_offset << dev_info->bmap_shift,
+ page, is_readahead, 0, free_group);
+
+ /* Ignore the result here - will check end of stream if come in again */
+ go_next_page(writing, 1);
+
+ if (result)
+ printk(KERN_ERR "toi_do_io returned %d.\n", result);
+ return result;
+}
+
+dev_t get_header_dev_t(void)
+{
+ return prio_chain_head->dev_t;
+}
+
+struct block_device *get_header_bdev(void)
+{
+ return prio_chain_head->bdev;
+}
+
+unsigned long get_headerblock(void)
+{
+ return prio_chain_head->blocks.first->start <<
+ prio_chain_head->bmap_shift;
+}
+
+int get_main_pool_phys_params(void)
+{
+ struct toi_bdev_info *this = prio_chain_head;
+ int result;
+
+ while (this) {
+ result = this->allocator->bio_allocator_ops->bmap(this);
+ if (result)
+ return result;
+ this = this->next;
+ }
+
+ return 0;
+}
+
+static int apply_header_reservation(void)
+{
+ int i;
+
+ if (!header_pages_reserved) {
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "No header pages reserved at the moment.");
+ return 0;
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Applying header reservation.");
+
+ /* Apply header space reservation */
+ toi_extent_state_goto_start();
+
+ for (i = 0; i < header_pages_reserved; i++)
+ if (go_next_page(1, 0))
+ return -ENOSPC;
+
+ /* The end of header pages will be the start of pageset 2 */
+ toi_extent_state_save(2);
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Finished applying header reservation.");
+ return 0;
+}
+
+static int toi_bio_register_storage(void)
+{
+ int result = 0;
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled ||
+ this_module->type != BIO_ALLOCATOR_MODULE)
+ continue;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Registering storage from %s.",
+ this_module->name);
+ result = this_module->bio_allocator_ops->register_storage();
+ if (result)
+ break;
+ }
+
+ return result;
+}
+
+void toi_bio_free_unused_storage(void)
+{
+ struct toi_bdev_info *this = prio_chain_head;
+
+ while (this) {
+ toi_bio_free_unused_storage_chain(this);
+ this = this->next;
+ }
+}
+
+int toi_bio_allocate_storage(unsigned long request)
+{
+ struct toi_bdev_info *chain = prio_chain_head;
+ unsigned long to_get = request;
+ unsigned long extra_pages, needed;
+ int no_free = 0;
+
+ if (!chain) {
+ int result = toi_bio_register_storage();
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
+ "Registering storage.");
+ if (result)
+ return 0;
+ chain = prio_chain_head;
+ if (!chain) {
+ printk("TuxOnIce: No storage was registered.\n");
+ return 0;
+ }
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_allocate_storage: "
+ "Request is %lu pages.", request);
+ extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
+ + sizeof(int)), PAGE_SIZE);
+ needed = request + extra_pages + header_pages_reserved;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Adding %lu extra pages and %lu "
+ "for header => %lu.",
+ extra_pages, header_pages_reserved, needed);
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Already allocated %lu pages.",
+ raw_pages_allocd);
+
+ to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd : 0;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Need to get %lu pages.", to_get);
+
+ if (!to_get)
+ return apply_header_reservation();
+
+ while (to_get && chain) {
+ int num_group = devices_of_same_priority(chain);
+ int divisor = num_group - no_free;
+ int i;
+ unsigned long portion = DIV_ROUND_UP(to_get, divisor);
+ unsigned long got = 0;
+ unsigned long got_this_round = 0;
+ struct toi_bdev_info *top = chain;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ " Start of loop. To get is %lu. Divisor is %d.",
+ to_get, divisor);
+ no_free = 0;
+
+ /*
+ * We're aiming to spread the allocated storage as evenly
+ * as possible, but we also want to get all the storage we
+ * can off this priority.
+ */
+ for (i = 0; i < num_group; i++) {
+ struct toi_bio_allocator_ops *ops =
+ chain->allocator->bio_allocator_ops;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ " Asking for %lu pages from chain %p.",
+ portion, chain);
+ got = ops->allocate_storage(chain, portion);
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ " Got %lu pages from allocator %p.",
+ got, chain);
+ if (!got)
+ no_free++;
+ got_this_round += got;
+ chain = chain->next;
+ }
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, " Loop finished. Got a "
+ "total of %lu pages from %d allocators.",
+ got_this_round, divisor - no_free);
+
+ raw_pages_allocd += got_this_round;
+ to_get = needed > raw_pages_allocd ? needed - raw_pages_allocd :
+ 0;
+
+ /*
+ * If we got anything from chains of this priority and we
+ * still have storage to allocate, go over this priority
+ * again.
+ */
+ if (got_this_round && to_get)
+ chain = top;
+ else
+ no_free = 0;
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Finished allocating. Calling "
+ "get_main_pool_phys_params");
+ /* Now let swap allocator bmap the pages */
+ get_main_pool_phys_params();
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Done. Reserving header.");
+ return apply_header_reservation();
+}
+
+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd)
+{
+ int i = 0;
+ struct toi_bdev_info *cur_chain = prio_chain_head;
+
+ while (cur_chain) {
+ cur_chain->pages_used = bkd->pages_used[i];
+ cur_chain = cur_chain->next;
+ i++;
+ }
+}
+
+int toi_bio_chains_debug_info(char *buffer, int size)
+{
+ /* Show what we actually used */
+ struct toi_bdev_info *cur_chain = prio_chain_head;
+ int len = 0;
+
+ while (cur_chain) {
+ len += scnprintf(buffer + len, size - len, " Used %lu pages "
+ "from %s.\n", cur_chain->pages_used,
+ cur_chain->name);
+ cur_chain = cur_chain->next;
+ }
+
+ return len;
+}
+
+void toi_bio_store_inc_image_ptr(struct toi_incremental_image_pointer *ptr)
+{
+ struct toi_bdev_info *this = toi_writer_posn.current_chain,
+ *cmp = prio_chain_head;
+
+ ptr->save.chain = 1;
+ while (this != cmp) {
+ ptr->save.chain++;
+ cmp = cmp->next;
+ }
+ ptr->save.block = this->blocks.current_offset;
+
+ /* Save the raw info internally for quicker access when updating pointers */
+ ptr->bdev = this->bdev;
+ ptr->block = this->blocks.current_offset << this->bmap_shift;
+}
+
+void toi_bio_restore_inc_image_ptr(struct toi_incremental_image_pointer *ptr)
+{
+ int i = ptr->save.chain - 1;
+ struct toi_bdev_info *this;
+ struct hibernate_extent *hib;
+
+ /* Find chain by stored index */
+ this = prio_chain_head;
+ while (i) {
+ this = this->next;
+ i--;
+ }
+ toi_writer_posn.current_chain = this;
+
+ /* Restore block */
+ this->blocks.current_offset = ptr->save.block;
+
+ /* Find current offset from block number */
+ hib = this->blocks.first;
+
+ while (hib->start > ptr->save.block) {
+ hib = hib->next;
+ }
+
+ this->blocks.last_touched = this->blocks.current_extent = hib;
+}
diff --git a/kernel/power/tuxonice_bio_core.c b/kernel/power/tuxonice_bio_core.c
new file mode 100644
index 000000000..d18f2751c
--- /dev/null
+++ b/kernel/power/tuxonice_bio_core.c
@@ -0,0 +1,1933 @@
+/*
+ * kernel/power/tuxonice_bio.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains block io functions for TuxOnIce. These are
+ * used by the swapwriter and it is planned that they will also
+ * be used by the NFSwriter.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/syscalls.h>
+#include <linux/suspend.h>
+#include <linux/ctype.h>
+#include <linux/fs_uuid.h>
+#include <linux/mount.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_bio_internal.h"
+
+#define MEMORY_ONLY 1
+#define THROTTLE_WAIT 2
+
+/* #define MEASURE_MUTEX_CONTENTION */
+#ifndef MEASURE_MUTEX_CONTENTION
+#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
+#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
+#else
+unsigned long mutex_times[2][2][NR_CPUS];
+#define my_mutex_lock(index, the_lock) do { \
+ int have_mutex; \
+ have_mutex = mutex_trylock(the_lock); \
+ if (!have_mutex) { \
+ mutex_lock(the_lock); \
+ mutex_times[index][0][smp_processor_id()]++; \
+ } else { \
+ mutex_times[index][1][smp_processor_id()]++; \
+ }
+
+#define my_mutex_unlock(index, the_lock) \
+ mutex_unlock(the_lock); \
+} while (0)
+#endif
+
+static int page_idx, reset_idx;
+
+static int target_outstanding_io = 1024;
+static int max_outstanding_writes, max_outstanding_reads;
+
+static struct page *bio_queue_head, *bio_queue_tail;
+static atomic_t toi_bio_queue_size;
+static DEFINE_SPINLOCK(bio_queue_lock);
+
+static int free_mem_throttle, throughput_throttle;
+int more_readahead = 1;
+static struct page *readahead_list_head, *readahead_list_tail;
+
+static struct page *waiting_on;
+
+static atomic_t toi_io_in_progress, toi_io_done;
+static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
+
+int current_stream;
+/* Not static, so that the allocators can setup and complete
+ * writing the header */
+char *toi_writer_buffer;
+int toi_writer_buffer_posn;
+
+static DEFINE_MUTEX(toi_bio_mutex);
+static DEFINE_MUTEX(toi_bio_readahead_mutex);
+
+static struct task_struct *toi_queue_flusher;
+static int toi_bio_queue_flush_pages(int dedicated_thread);
+
+struct toi_module_ops toi_blockwriter_ops;
+
+struct toi_incremental_image_pointer toi_inc_ptr[2][2];
+
+#define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
+ atomic_read(&toi_bio_queue_size))
+
+unsigned long raw_pages_allocd, header_pages_reserved;
+
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+ int no_readahead);
+
+/**
+ * set_free_mem_throttle - set the point where we pause to avoid oom.
+ *
+ * Initially, this value is zero, but when we first fail to allocate memory,
+ * we set it (plus a buffer) and thereafter throttle i/o once that limit is
+ * reached.
+ **/
+static void set_free_mem_throttle(void)
+{
+ int new_throttle = nr_free_buffer_pages() + 256;
+
+ if (new_throttle > free_mem_throttle)
+ free_mem_throttle = new_throttle;
+}
+
+#define NUM_REASONS 7
+static atomic_t reasons[NUM_REASONS];
+static char *reason_name[NUM_REASONS] = {
+ "readahead not ready",
+ "bio allocation",
+ "synchronous I/O",
+ "toi_bio_get_new_page",
+ "memory low",
+ "readahead buffer allocation",
+ "throughput_throttle",
+};
+
+/* User Specified Parameters. */
+unsigned long resume_firstblock;
+dev_t resume_dev_t;
+struct block_device *resume_block_device;
+static atomic_t resume_bdev_open_count;
+
+struct block_device *header_block_device;
+
+/**
+ * toi_open_bdev: Open a bdev at resume time.
+ *
+ * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
+ * (the user can have resume= pointing at a swap partition/file that isn't
+ * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
+ * header. It will be from a swap partition that was enabled when we hibernated,
+ * but we don't know it's real index until we read that first page.
+ * dev_t: The device major/minor.
+ * display_errs: Whether to try to do this quietly.
+ *
+ * We stored a dev_t in the image header. Open the matching device without
+ * requiring /dev/<whatever> in most cases and record the details needed
+ * to close it later and avoid duplicating work.
+ */
+struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
+ int display_errs)
+{
+ struct block_device *bdev;
+ dev_t device = default_device;
+ char buf[32];
+ int retried = 0;
+
+retry:
+ if (uuid) {
+ struct fs_info seek;
+ strncpy((char *) &seek.uuid, uuid, 16);
+ seek.dev_t = 0;
+ seek.last_mount_size = 0;
+ device = blk_lookup_fs_info(&seek);
+ if (!device) {
+ device = default_device;
+ printk(KERN_DEBUG "Unable to resolve uuid. Falling back"
+ " to dev_t.\n");
+ } else
+ printk(KERN_DEBUG "Resolved uuid to device %s.\n",
+ format_dev_t(buf, device));
+ }
+
+ if (!device) {
+ printk(KERN_ERR "TuxOnIce attempting to open a "
+ "blank dev_t!\n");
+ dump_stack();
+ return NULL;
+ }
+ bdev = toi_open_by_devnum(device);
+
+ if (IS_ERR(bdev) || !bdev) {
+ if (!retried) {
+ retried = 1;
+ wait_for_device_probe();
+ goto retry;
+ }
+ if (display_errs)
+ toi_early_boot_message(1, TOI_CONTINUE_REQ,
+ "Failed to get access to block device "
+ "\"%x\" (error %d).\n Maybe you need "
+ "to run mknod and/or lvmsetup in an "
+ "initrd/ramfs?", device, bdev);
+ return ERR_PTR(-EINVAL);
+ }
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "TuxOnIce got bdev %p for dev_t %x.",
+ bdev, device);
+
+ return bdev;
+}
+
+static void toi_bio_reserve_header_space(unsigned long request)
+{
+ header_pages_reserved = request;
+}
+
+/**
+ * do_bio_wait - wait for some TuxOnIce I/O to complete
+ * @reason: The array index of the reason we're waiting.
+ *
+ * Wait for a particular page of I/O if we're after a particular page.
+ * If we're not after a particular page, wait instead for all in flight
+ * I/O to be completed or for us to have enough free memory to be able
+ * to submit more I/O.
+ *
+ * If we wait, we also update our statistics regarding why we waited.
+ **/
+static void do_bio_wait(int reason)
+{
+ struct page *was_waiting_on = waiting_on;
+
+ /* On SMP, waiting_on can be reset, so we make a copy */
+ if (was_waiting_on) {
+ wait_on_page_locked(was_waiting_on);
+ atomic_inc(&reasons[reason]);
+ } else {
+ atomic_inc(&reasons[reason]);
+
+ wait_event(num_in_progress_wait,
+ !atomic_read(&toi_io_in_progress) ||
+ nr_free_buffer_pages() > free_mem_throttle);
+ }
+}
+
+/**
+ * throttle_if_needed - wait for I/O completion if throttle points are reached
+ * @flags: What to check and how to act.
+ *
+ * Check whether we need to wait for some I/O to complete. We always check
+ * whether we have enough memory available, but may also (depending upon
+ * @reason) check if the throughput throttle limit has been reached.
+ **/
+static int throttle_if_needed(int flags)
+{
+ int free_pages = nr_free_buffer_pages();
+
+ /* Getting low on memory and I/O is in progress? */
+ while (unlikely(free_pages < free_mem_throttle) &&
+ atomic_read(&toi_io_in_progress) &&
+ !test_result_state(TOI_ABORTED)) {
+ if (!(flags & THROTTLE_WAIT))
+ return -ENOMEM;
+ do_bio_wait(4);
+ free_pages = nr_free_buffer_pages();
+ }
+
+ while (!(flags & MEMORY_ONLY) && throughput_throttle &&
+ TOTAL_OUTSTANDING_IO >= throughput_throttle &&
+ !test_result_state(TOI_ABORTED)) {
+ int result = toi_bio_queue_flush_pages(0);
+ if (result)
+ return result;
+ atomic_inc(&reasons[6]);
+ wait_event(num_in_progress_wait,
+ !atomic_read(&toi_io_in_progress) ||
+ TOTAL_OUTSTANDING_IO < throughput_throttle);
+ }
+
+ return 0;
+}
+
+/**
+ * update_throughput_throttle - update the raw throughput throttle
+ * @jif_index: The number of times this function has been called.
+ *
+ * This function is called four times per second by the core, and used to limit
+ * the amount of I/O we submit at once, spreading out our waiting through the
+ * whole job and letting userui get an opportunity to do its work.
+ *
+ * We don't start limiting I/O until 1/4s has gone so that we get a
+ * decent sample for our initial limit, and keep updating it because
+ * throughput may vary (on rotating media, eg) with our block number.
+ *
+ * We throttle to 1/10s worth of I/O.
+ **/
+static void update_throughput_throttle(int jif_index)
+{
+ int done = atomic_read(&toi_io_done);
+ throughput_throttle = done * 2 / 5 / jif_index;
+}
+
+/**
+ * toi_finish_all_io - wait for all outstanding i/o to complete
+ *
+ * Flush any queued but unsubmitted I/O and wait for it all to complete.
+ **/
+static int toi_finish_all_io(void)
+{
+ int result = toi_bio_queue_flush_pages(0);
+ toi_bio_queue_flusher_should_finish = 1;
+ wake_up(&toi_io_queue_flusher);
+ wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
+ return result;
+}
+
+/**
+ * toi_end_bio - bio completion function.
+ * @bio: bio that has completed.
+ * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
+ *
+ * Function called by the block driver from interrupt context when I/O is
+ * completed. If we were writing the page, we want to free it and will have
+ * set bio->bi_private to the parameter we should use in telling the page
+ * allocation accounting code what the page was allocated for. If we're
+ * reading the page, it will be in the singly linked list made from
+ * page->private pointers.
+ **/
+static void toi_end_bio(struct bio *bio, int err)
+{
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
+
+ unlock_page(page);
+ bio_put(bio);
+
+ if (waiting_on == page)
+ waiting_on = NULL;
+
+ put_page(page);
+
+ if (bio->bi_private)
+ toi__free_page((int) ((unsigned long) bio->bi_private) , page);
+
+ bio_put(bio);
+
+ atomic_dec(&toi_io_in_progress);
+ atomic_inc(&toi_io_done);
+
+ wake_up(&num_in_progress_wait);
+}
+
+/**
+ * submit - submit BIO request
+ * @writing: READ or WRITE.
+ * @dev: The block device we're using.
+ * @first_block: The first sector we're using.
+ * @page: The page being used for I/O.
+ * @free_group: If writing, the group that was used in allocating the page
+ * and which will be used in freeing the page from the completion
+ * routine.
+ *
+ * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
+ * textbook - allocate and initialize the bio. If we're writing, make sure
+ * the page is marked as dirty. Then submit it and carry on."
+ *
+ * If we're just testing the speed of our own code, we fake having done all
+ * the hard work and all toi_end_bio immediately.
+ **/
+static int submit(int writing, struct block_device *dev, sector_t first_block,
+ struct page *page, int free_group)
+{
+ struct bio *bio = NULL;
+ int cur_outstanding_io, result;
+
+ /*
+ * Shouldn't throttle if reading - can deadlock in the single
+ * threaded case as pages are only freed when we use the
+ * readahead.
+ */
+ if (writing) {
+ result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT);
+ if (result)
+ return result;
+ }
+
+ while (!bio) {
+ bio = bio_alloc(TOI_ATOMIC_GFP, 1);
+ if (!bio) {
+ set_free_mem_throttle();
+ do_bio_wait(1);
+ }
+ }
+
+ bio->bi_bdev = dev;
+ bio->bi_iter.bi_sector = first_block;
+ bio->bi_private = (void *) ((unsigned long) free_group);
+ bio->bi_end_io = toi_end_bio;
+ bio->bi_flags |= (1 << BIO_TOI);
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk(KERN_DEBUG "ERROR: adding page to bio at %lld\n",
+ (unsigned long long) first_block);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ bio_get(bio);
+
+ cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
+ if (writing) {
+ if (cur_outstanding_io > max_outstanding_writes)
+ max_outstanding_writes = cur_outstanding_io;
+ } else {
+ if (cur_outstanding_io > max_outstanding_reads)
+ max_outstanding_reads = cur_outstanding_io;
+ }
+
+ /* Still read the header! */
+ if (unlikely(test_action_state(TOI_TEST_BIO) && writing)) {
+ /* Fake having done the hard work */
+ set_bit(BIO_UPTODATE, &bio->bi_flags);
+ toi_end_bio(bio, 0);
+ } else
+ submit_bio(writing | REQ_SYNC, bio);
+
+ return 0;
+}
+
+/**
+ * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
+ *
+ * @writing: Whether reading or writing.
+ * @bdev: The block device which we're using.
+ * @block0: The first sector we're reading or writing.
+ * @page: The page on which I/O is being done.
+ * @readahead_index: If doing readahead, the index (reset this flag when done).
+ * @syncio: Whether the i/o is being done synchronously.
+ *
+ * Prepare and start a read or write operation.
+ *
+ * Note that we always work with our own page. If writing, we might be given a
+ * compression buffer that will immediately be used to start compressing the
+ * next page. For reading, we do readahead and therefore don't know the final
+ * address where the data needs to go.
+ **/
+int toi_do_io(int writing, struct block_device *bdev, long block0,
+ struct page *page, int is_readahead, int syncio, int free_group)
+{
+ page->private = 0;
+
+ /* Do here so we don't race against toi_bio_get_next_page_read */
+ lock_page(page);
+
+ if (is_readahead) {
+ if (readahead_list_head)
+ readahead_list_tail->private = (unsigned long) page;
+ else
+ readahead_list_head = page;
+
+ readahead_list_tail = page;
+ }
+
+ /* Done before submitting to avoid races. */
+ if (syncio)
+ waiting_on = page;
+
+ /* Submit the page */
+ get_page(page);
+
+ if (submit(writing, bdev, block0, page, free_group))
+ return -EFAULT;
+
+ if (syncio)
+ do_bio_wait(2);
+
+ return 0;
+}
+
+/**
+ * toi_bdev_page_io - simpler interface to do directly i/o on a single page
+ * @writing: Whether reading or writing.
+ * @bdev: Block device on which we're operating.
+ * @pos: Sector at which page to read or write starts.
+ * @page: Page to be read/written.
+ *
+ * A simple interface to submit a page of I/O and wait for its completion.
+ * The caller must free the page used.
+ **/
+static int toi_bdev_page_io(int writing, struct block_device *bdev,
+ long pos, struct page *page)
+{
+ return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
+}
+
+/**
+ * toi_bio_memory_needed - report the amount of memory needed for block i/o
+ *
+ * We want to have at least enough memory so as to have target_outstanding_io
+ * or more transactions on the fly at once. If we can do more, fine.
+ **/
+static int toi_bio_memory_needed(void)
+{
+ return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
+ sizeof(struct bio));
+}
+
+/**
+ * toi_bio_print_debug_stats - put out debugging info in the buffer provided
+ * @buffer: A buffer of size @size into which text should be placed.
+ * @size: The size of @buffer.
+ *
+ * Fill a buffer with debugging info. This is used for both our debug_info sysfs
+ * entry and for recording the same info in dmesg.
+ **/
+static int toi_bio_print_debug_stats(char *buffer, int size)
+{
+ int len = 0;
+
+ if (toiActiveAllocator != &toi_blockwriter_ops) {
+ len = scnprintf(buffer, size,
+ "- Block I/O inactive.\n");
+ return len;
+ }
+
+ len = scnprintf(buffer, size, "- Block I/O active.\n");
+
+ len += toi_bio_chains_debug_info(buffer + len, size - len);
+
+ len += scnprintf(buffer + len, size - len,
+ "- Max outstanding reads %d. Max writes %d.\n",
+ max_outstanding_reads, max_outstanding_writes);
+
+ len += scnprintf(buffer + len, size - len,
+ " Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
+ target_outstanding_io,
+ PAGE_SIZE, (unsigned int) sizeof(struct request),
+ (unsigned int) sizeof(struct bio), toi_bio_memory_needed());
+
+#ifdef MEASURE_MUTEX_CONTENTION
+ {
+ int i;
+
+ len += scnprintf(buffer + len, size - len,
+ " Mutex contention while reading:\n Contended Free\n");
+
+ for_each_online_cpu(i)
+ len += scnprintf(buffer + len, size - len,
+ " %9lu %9lu\n",
+ mutex_times[0][0][i], mutex_times[0][1][i]);
+
+ len += scnprintf(buffer + len, size - len,
+ " Mutex contention while writing:\n Contended Free\n");
+
+ for_each_online_cpu(i)
+ len += scnprintf(buffer + len, size - len,
+ " %9lu %9lu\n",
+ mutex_times[1][0][i], mutex_times[1][1][i]);
+
+ }
+#endif
+
+ return len + scnprintf(buffer + len, size - len,
+ " Free mem throttle point reached %d.\n", free_mem_throttle);
+}
+
+static int total_header_bytes;
+static int unowned;
+
+void debug_broken_header(void)
+{
+ printk(KERN_DEBUG "Image header too big for size allocated!\n");
+ print_toi_header_storage_for_modules();
+ printk(KERN_DEBUG "Page flags : %d.\n", toi_pageflags_space_needed());
+ printk(KERN_DEBUG "toi_header : %zu.\n", sizeof(struct toi_header));
+ printk(KERN_DEBUG "Total unowned : %d.\n", unowned);
+ printk(KERN_DEBUG "Total used : %d (%ld pages).\n", total_header_bytes,
+ DIV_ROUND_UP(total_header_bytes, PAGE_SIZE));
+ printk(KERN_DEBUG "Space needed now : %ld.\n",
+ get_header_storage_needed());
+ dump_block_chains();
+ abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small.");
+}
+
+static int toi_bio_update_previous_inc_img_ptr(int stream)
+{
+ int result;
+ char * buffer = (char *) toi_get_zeroed_page(12, TOI_ATOMIC_GFP);
+ struct page *page;
+ struct toi_incremental_image_pointer *prev, *this;
+
+ prev = &toi_inc_ptr[stream][0];
+ this = &toi_inc_ptr[stream][1];
+
+ if (!buffer) {
+ // We're at the start of writing a pageset. Memory should not be that scarce.
+ return -ENOMEM;
+ }
+
+ page = virt_to_page(buffer);
+ result = toi_do_io(READ, prev->bdev, prev->block, page, 0, 1, 0);
+
+ if (result)
+ goto out;
+
+ memcpy(buffer, (char *) this, sizeof(this->save));
+
+ result = toi_do_io(WRITE, prev->bdev, prev->block, page, 0, 0, 12);
+
+ // If the IO is successfully submitted (!result), the page will be freed
+ // asynchronously on completion.
+out:
+ if (result)
+ toi__free_page(12, virt_to_page(buffer));
+ return result;
+}
+
+/**
+ * toi_rw_init_incremental - incremental image part of setting up to write new section
+ */
+static int toi_write_init_incremental(int stream)
+{
+ int result = 0;
+
+ // Remember the location of this block so we can link to it.
+ toi_bio_store_inc_image_ptr(&toi_inc_ptr[stream][1]);
+
+ // Update the pointer at the start of the last pageset with the same stream number.
+ result = toi_bio_update_previous_inc_img_ptr(stream);
+ if (result)
+ return result;
+
+ // Move the current to the previous slot.
+ memcpy(&toi_inc_ptr[stream][0], &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]));
+
+ // Store a blank pointer at the start of this incremental pageset
+ memset(&toi_inc_ptr[stream][1], 0, sizeof(toi_inc_ptr[stream][1]));
+ result = toi_rw_buffer(WRITE, (char *) &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]), 0);
+ if (result)
+ return result;
+
+ // Serialise extent chains if this is an incremental pageset
+ return toi_serialise_extent_chains();
+}
+
+/**
+ * toi_read_init_incremental - incremental image part of setting up to read new section
+ */
+static int toi_read_init_incremental(int stream)
+{
+ int result;
+
+ // Set our position to the start of the next pageset
+ toi_bio_restore_inc_image_ptr(&toi_inc_ptr[stream][1]);
+
+ // Read the start of the next incremental pageset (if any)
+ result = toi_rw_buffer(READ, (char *) &toi_inc_ptr[stream][1], sizeof(toi_inc_ptr[stream][1]), 0);
+
+ if (!result)
+ result = toi_load_extent_chains();
+
+ return result;
+}
+
+/**
+ * toi_rw_init - prepare to read or write a stream in the image
+ * @writing: Whether reading or writing.
+ * @stream number: Section of the image being processed.
+ *
+ * Prepare to read or write a section ('stream') in the image.
+ **/
+static int toi_rw_init(int writing, int stream_number)
+{
+ if (stream_number)
+ toi_extent_state_restore(stream_number);
+ else
+ toi_extent_state_goto_start();
+
+ if (writing) {
+ reset_idx = 0;
+ if (!current_stream)
+ page_idx = 0;
+ } else {
+ reset_idx = 1;
+ }
+
+ atomic_set(&toi_io_done, 0);
+ if (!toi_writer_buffer)
+ toi_writer_buffer = (char *) toi_get_zeroed_page(11,
+ TOI_ATOMIC_GFP);
+ toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
+
+ current_stream = stream_number;
+
+ more_readahead = 1;
+
+ if (test_result_state(TOI_KEPT_IMAGE)) {
+ int result;
+
+ if (writing) {
+ result = toi_write_init_incremental(stream_number);
+ } else {
+ result = toi_read_init_incremental(stream_number);
+ }
+
+ if (result)
+ return result;
+ }
+
+ return toi_writer_buffer ? 0 : -ENOMEM;
+}
+
+/**
+ * toi_bio_queue_write - queue a page for writing
+ * @full_buffer: Pointer to a page to be queued
+ *
+ * Add a page to the queue to be submitted. If we're the queue flusher,
+ * we'll do this once we've dropped toi_bio_mutex, so other threads can
+ * continue to submit I/O while we're on the slow path doing the actual
+ * submission.
+ **/
+static void toi_bio_queue_write(char **full_buffer)
+{
+ struct page *page = virt_to_page(*full_buffer);
+ unsigned long flags;
+
+ *full_buffer = NULL;
+ page->private = 0;
+
+ spin_lock_irqsave(&bio_queue_lock, flags);
+ if (!bio_queue_head)
+ bio_queue_head = page;
+ else
+ bio_queue_tail->private = (unsigned long) page;
+
+ bio_queue_tail = page;
+ atomic_inc(&toi_bio_queue_size);
+
+ spin_unlock_irqrestore(&bio_queue_lock, flags);
+ wake_up(&toi_io_queue_flusher);
+}
+
+/**
+ * toi_rw_cleanup - Cleanup after i/o.
+ * @writing: Whether we were reading or writing.
+ *
+ * Flush all I/O and clean everything up after reading or writing a
+ * section of the image.
+ **/
+static int toi_rw_cleanup(int writing)
+{
+ int i, result = 0;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_rw_cleanup.");
+ if (writing) {
+ if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED))
+ toi_bio_queue_write(&toi_writer_buffer);
+
+ while (bio_queue_head && !result)
+ result = toi_bio_queue_flush_pages(0);
+
+ if (result)
+ return result;
+
+ if (current_stream == 2)
+ toi_extent_state_save(1);
+ else if (current_stream == 1)
+ toi_extent_state_save(3);
+ }
+
+ result = toi_finish_all_io();
+
+ while (readahead_list_head) {
+ void *next = (void *) readahead_list_head->private;
+ toi__free_page(12, readahead_list_head);
+ readahead_list_head = next;
+ }
+
+ readahead_list_tail = NULL;
+
+ if (!current_stream)
+ return result;
+
+ for (i = 0; i < NUM_REASONS; i++) {
+ if (!atomic_read(&reasons[i]))
+ continue;
+ printk(KERN_DEBUG "Waited for i/o due to %s %d times.\n",
+ reason_name[i], atomic_read(&reasons[i]));
+ atomic_set(&reasons[i], 0);
+ }
+
+ current_stream = 0;
+ return result;
+}
+
+/**
+ * toi_start_one_readahead - start one page of readahead
+ * @dedicated_thread: Is this a thread dedicated to doing readahead?
+ *
+ * Start one new page of readahead. If this is being called by a thread
+ * whose only just is to submit readahead, don't quit because we failed
+ * to allocate a page.
+ **/
+static int toi_start_one_readahead(int dedicated_thread)
+{
+ char *buffer = NULL;
+ int oom = 0, result;
+
+ result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0);
+ if (result) {
+ printk("toi_start_one_readahead: throttle_if_needed returned %d.\n", result);
+ return result;
+ }
+
+ mutex_lock(&toi_bio_readahead_mutex);
+
+ while (!buffer) {
+ buffer = (char *) toi_get_zeroed_page(12,
+ TOI_ATOMIC_GFP);
+ if (!buffer) {
+ if (oom && !dedicated_thread) {
+ mutex_unlock(&toi_bio_readahead_mutex);
+ printk("toi_start_one_readahead: oom and !dedicated thread %d.\n", result);
+ return -ENOMEM;
+ }
+
+ oom = 1;
+ set_free_mem_throttle();
+ do_bio_wait(5);
+ }
+ }
+
+ result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
+ if (result) {
+ printk("toi_start_one_readahead: toi_bio_rw_page returned %d.\n", result);
+ }
+ if (result == -ENOSPC)
+ toi__free_page(12, virt_to_page(buffer));
+ mutex_unlock(&toi_bio_readahead_mutex);
+ if (result) {
+ if (result == -ENOSPC)
+ toi_message(TOI_BIO, TOI_VERBOSE, 0,
+ "Last readahead page submitted.");
+ else
+ printk(KERN_DEBUG "toi_bio_rw_page returned %d.\n",
+ result);
+ }
+ return result;
+}
+
+/**
+ * toi_start_new_readahead - start new readahead
+ * @dedicated_thread: Are we dedicated to this task?
+ *
+ * Start readahead of image pages.
+ *
+ * We can be called as a thread dedicated to this task (may be helpful on
+ * systems with lots of CPUs), in which case we don't exit until there's no
+ * more readahead.
+ *
+ * If this is not called by a dedicated thread, we top up our queue until
+ * there's no more readahead to submit, we've submitted the number given
+ * in target_outstanding_io or the number in progress exceeds the target
+ * outstanding I/O value.
+ *
+ * No mutex needed because this is only ever called by the first cpu.
+ **/
+static int toi_start_new_readahead(int dedicated_thread)
+{
+ int last_result, num_submitted = 0;
+
+ /* Start a new readahead? */
+ if (!more_readahead)
+ return 0;
+
+ do {
+ last_result = toi_start_one_readahead(dedicated_thread);
+
+ if (last_result) {
+ if (last_result == -ENOMEM || last_result == -ENOSPC)
+ return 0;
+
+ printk(KERN_DEBUG
+ "Begin read chunk returned %d.\n",
+ last_result);
+ } else
+ num_submitted++;
+
+ } while (more_readahead && !last_result &&
+ (dedicated_thread ||
+ (num_submitted < target_outstanding_io &&
+ atomic_read(&toi_io_in_progress) < target_outstanding_io)));
+
+ return last_result;
+}
+
+/**
+ * bio_io_flusher - start the dedicated I/O flushing routine
+ * @writing: Whether we're writing the image.
+ **/
+static int bio_io_flusher(int writing)
+{
+
+ if (writing)
+ return toi_bio_queue_flush_pages(1);
+ else
+ return toi_start_new_readahead(1);
+}
+
+/**
+ * toi_bio_get_next_page_read - read a disk page, perhaps with readahead
+ * @no_readahead: Whether we can use readahead
+ *
+ * Read a page from disk, submitting readahead and cleaning up finished i/o
+ * while we wait for the page we're after.
+ **/
+static int toi_bio_get_next_page_read(int no_readahead)
+{
+ char *virt;
+ struct page *old_readahead_list_head;
+
+ /*
+ * When reading the second page of the header, we have to
+ * delay submitting the read until after we've gotten the
+ * extents out of the first page.
+ */
+ if (unlikely(no_readahead)) {
+ int result = toi_start_one_readahead(0);
+ if (result) {
+ printk(KERN_EMERG "No readahead and toi_start_one_readahead "
+ "returned non-zero.\n");
+ return -EIO;
+ }
+ }
+
+ if (unlikely(!readahead_list_head)) {
+ /*
+ * If the last page finishes exactly on the page
+ * boundary, we will be called one extra time and
+ * have no data to return. In this case, we should
+ * not BUG(), like we used to!
+ */
+ if (!more_readahead) {
+ printk(KERN_EMERG "No more readahead.\n");
+ return -ENOSPC;
+ }
+ if (unlikely(toi_start_one_readahead(0))) {
+ printk(KERN_EMERG "No readahead and "
+ "toi_start_one_readahead returned non-zero.\n");
+ return -EIO;
+ }
+ }
+
+ if (PageLocked(readahead_list_head)) {
+ waiting_on = readahead_list_head;
+ do_bio_wait(0);
+ }
+
+ virt = page_address(readahead_list_head);
+ memcpy(toi_writer_buffer, virt, PAGE_SIZE);
+
+ mutex_lock(&toi_bio_readahead_mutex);
+ old_readahead_list_head = readahead_list_head;
+ readahead_list_head = (struct page *) readahead_list_head->private;
+ mutex_unlock(&toi_bio_readahead_mutex);
+ toi__free_page(12, old_readahead_list_head);
+ return 0;
+}
+
+/**
+ * toi_bio_queue_flush_pages - flush the queue of pages queued for writing
+ * @dedicated_thread: Whether we're a dedicated thread
+ *
+ * Flush the queue of pages ready to be written to disk.
+ *
+ * If we're a dedicated thread, stay in here until told to leave,
+ * sleeping in wait_event.
+ *
+ * The first thread is normally the only one to come in here. Another
+ * thread can enter this routine too, though, via throttle_if_needed.
+ * Since that's the case, we must be careful to only have one thread
+ * doing this work at a time. Otherwise we have a race and could save
+ * pages out of order.
+ *
+ * If an error occurs, free all remaining pages without submitting them
+ * for I/O.
+ **/
+
+int toi_bio_queue_flush_pages(int dedicated_thread)
+{
+ unsigned long flags;
+ int result = 0;
+ static DEFINE_MUTEX(busy);
+
+ if (!mutex_trylock(&busy))
+ return 0;
+
+top:
+ spin_lock_irqsave(&bio_queue_lock, flags);
+ while (bio_queue_head) {
+ struct page *page = bio_queue_head;
+ bio_queue_head = (struct page *) page->private;
+ if (bio_queue_tail == page)
+ bio_queue_tail = NULL;
+ atomic_dec(&toi_bio_queue_size);
+ spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+ /* Don't generate more error messages if already had one */
+ if (!result)
+ result = toi_bio_rw_page(WRITE, page, 0, 11);
+ /*
+ * If writing the page failed, don't drop out.
+ * Flush the rest of the queue too.
+ */
+ if (result)
+ toi__free_page(11 , page);
+ spin_lock_irqsave(&bio_queue_lock, flags);
+ }
+ spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+ if (dedicated_thread) {
+ wait_event(toi_io_queue_flusher, bio_queue_head ||
+ toi_bio_queue_flusher_should_finish);
+ if (likely(!toi_bio_queue_flusher_should_finish))
+ goto top;
+ toi_bio_queue_flusher_should_finish = 0;
+ }
+
+ mutex_unlock(&busy);
+ return result;
+}
+
+/**
+ * toi_bio_get_new_page - get a new page for I/O
+ * @full_buffer: Pointer to a page to allocate.
+ **/
+static int toi_bio_get_new_page(char **full_buffer)
+{
+ int result = throttle_if_needed(THROTTLE_WAIT);
+ if (result)
+ return result;
+
+ while (!*full_buffer) {
+ *full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
+ if (!*full_buffer) {
+ set_free_mem_throttle();
+ do_bio_wait(3);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O
+ * @writing: Bool - whether writing (or reading).
+ * @buffer: The start of the buffer to write or fill.
+ * @buffer_size: The size of the buffer to write or fill.
+ * @no_readahead: Don't try to start readhead (when getting extents).
+ **/
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+ int no_readahead)
+{
+ int bytes_left = buffer_size, result = 0;
+
+ while (bytes_left) {
+ char *source_start = buffer + buffer_size - bytes_left;
+ char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
+ int capacity = PAGE_SIZE - toi_writer_buffer_posn;
+ char *to = writing ? dest_start : source_start;
+ char *from = writing ? source_start : dest_start;
+
+ if (bytes_left <= capacity) {
+ memcpy(to, from, bytes_left);
+ toi_writer_buffer_posn += bytes_left;
+ return 0;
+ }
+
+ /* Complete this page and start a new one */
+ memcpy(to, from, capacity);
+ bytes_left -= capacity;
+
+ if (!writing) {
+ /*
+ * Perform actual I/O:
+ * read readahead_list_head into toi_writer_buffer
+ */
+ int result = toi_bio_get_next_page_read(no_readahead);
+ if (result && bytes_left) {
+ printk("toi_bio_get_next_page_read "
+ "returned %d. Expecting to read %d bytes.\n", result, bytes_left);
+ return result;
+ }
+ } else {
+ toi_bio_queue_write(&toi_writer_buffer);
+ result = toi_bio_get_new_page(&toi_writer_buffer);
+ if (result) {
+ printk(KERN_ERR "toi_bio_get_new_page returned "
+ "%d.\n", result);
+ return result;
+ }
+ }
+
+ toi_writer_buffer_posn = 0;
+ toi_cond_pause(0, NULL);
+ }
+
+ return 0;
+}
+
+/**
+ * toi_bio_read_page - read a page of the image
+ * @pfn: The pfn where the data belongs.
+ * @buffer_page: The page containing the (possibly compressed) data.
+ * @buf_size: The number of bytes on @buffer_page used (PAGE_SIZE).
+ *
+ * Read a (possibly compressed) page from the image, into buffer_page,
+ * returning its pfn and the buffer size.
+ **/
+static int toi_bio_read_page(unsigned long *pfn, int buf_type,
+ void *buffer_page, unsigned int *buf_size)
+{
+ int result = 0;
+ int this_idx;
+ char *buffer_virt = TOI_MAP(buf_type, buffer_page);
+
+ /*
+ * Only call start_new_readahead if we don't have a dedicated thread
+ * and we're the queue flusher.
+ */
+ if (current == toi_queue_flusher && more_readahead &&
+ !test_action_state(TOI_NO_READAHEAD)) {
+ int result2 = toi_start_new_readahead(0);
+ if (result2) {
+ printk(KERN_DEBUG "Queue flusher and "
+ "toi_start_one_readahead returned non-zero.\n");
+ result = -EIO;
+ goto out;
+ }
+ }
+
+ my_mutex_lock(0, &toi_bio_mutex);
+
+ /*
+ * Structure in the image:
+ * [destination pfn|page size|page data]
+ * buf_size is PAGE_SIZE
+ * We can validly find there's nothing to read in a multithreaded
+ * situation.
+ */
+ if (toi_rw_buffer(READ, (char *) &this_idx, sizeof(int), 0) ||
+ toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
+ toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
+ toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
+ result = -ENODATA;
+ goto out_unlock;
+ }
+
+ if (reset_idx) {
+ page_idx = this_idx;
+ reset_idx = 0;
+ } else {
+ page_idx++;
+ if (!this_idx)
+ result = -ENODATA;
+ else if (page_idx != this_idx)
+ printk(KERN_ERR "Got page index %d, expected %d.\n",
+ this_idx, page_idx);
+ }
+
+out_unlock:
+ my_mutex_unlock(0, &toi_bio_mutex);
+out:
+ TOI_UNMAP(buf_type, buffer_page);
+ return result;
+}
+
+/**
+ * toi_bio_write_page - write a page of the image
+ * @pfn: The pfn where the data belongs.
+ * @buffer_page: The page containing the (possibly compressed) data.
+ * @buf_size: The number of bytes on @buffer_page used.
+ *
+ * Write a (possibly compressed) page to the image from the buffer, together
+ * with it's index and buffer size.
+ **/
+static int toi_bio_write_page(unsigned long pfn, int buf_type,
+ void *buffer_page, unsigned int buf_size)
+{
+ char *buffer_virt;
+ int result = 0, result2 = 0;
+
+ if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
+ return 0;
+
+ my_mutex_lock(1, &toi_bio_mutex);
+
+ if (test_result_state(TOI_ABORTED)) {
+ my_mutex_unlock(1, &toi_bio_mutex);
+ return 0;
+ }
+
+ buffer_virt = TOI_MAP(buf_type, buffer_page);
+ page_idx++;
+
+ /*
+ * Structure in the image:
+ * [destination pfn|page size|page data]
+ * buf_size is PAGE_SIZE
+ */
+ if (toi_rw_buffer(WRITE, (char *) &page_idx, sizeof(int), 0) ||
+ toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
+ toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
+ toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
+ printk(KERN_DEBUG "toi_rw_buffer returned non-zero to "
+ "toi_bio_write_page.\n");
+ result = -EIO;
+ }
+
+ TOI_UNMAP(buf_type, buffer_page);
+ my_mutex_unlock(1, &toi_bio_mutex);
+
+ if (current == toi_queue_flusher)
+ result2 = toi_bio_queue_flush_pages(0);
+
+ return result ? result : result2;
+}
+
+/**
+ * _toi_rw_header_chunk - read or write a portion of the image header
+ * @writing: Whether reading or writing.
+ * @owner: The module for which we're writing.
+ * Used for confirming that modules
+ * don't use more header space than they asked for.
+ * @buffer: Address of the data to write.
+ * @buffer_size: Size of the data buffer.
+ * @no_readahead: Don't try to start readhead (when getting extents).
+ *
+ * Perform PAGE_SIZE I/O. Start readahead if needed.
+ **/
+static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+ char *buffer, int buffer_size, int no_readahead)
+{
+ int result = 0;
+
+ if (owner) {
+ owner->header_used += buffer_size;
+ toi_message(TOI_HEADER, TOI_LOW, 1,
+ "Header: %s : %d bytes (%d/%d) from offset %d.",
+ owner->name,
+ buffer_size, owner->header_used,
+ owner->header_requested,
+ toi_writer_buffer_posn);
+ if (owner->header_used > owner->header_requested && writing) {
+ printk(KERN_EMERG "TuxOnIce module %s is using more "
+ "header space (%u) than it requested (%u).\n",
+ owner->name,
+ owner->header_used,
+ owner->header_requested);
+ return buffer_size;
+ }
+ } else {
+ unowned += buffer_size;
+ toi_message(TOI_HEADER, TOI_LOW, 1,
+ "Header: (No owner): %d bytes (%d total so far) from "
+ "offset %d.", buffer_size, unowned,
+ toi_writer_buffer_posn);
+ }
+
+ if (!writing && !no_readahead && more_readahead) {
+ result = toi_start_new_readahead(0);
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Start new readahead "
+ "returned %d.", result);
+ }
+
+ if (!result) {
+ result = toi_rw_buffer(writing, buffer, buffer_size,
+ no_readahead);
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "rw_buffer returned "
+ "%d.", result);
+ }
+
+ total_header_bytes += buffer_size;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning "
+ "%d.", result);
+ return result;
+}
+
+static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+ char *buffer, int size)
+{
+ return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+static int toi_rw_header_chunk_noreadahead(int writing,
+ struct toi_module_ops *owner, char *buffer, int size)
+{
+ return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+/**
+ * toi_bio_storage_needed - get the amount of storage needed for my fns
+ **/
+static int toi_bio_storage_needed(void)
+{
+ return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed();
+}
+
+/**
+ * toi_bio_save_config_info - save block I/O config to image header
+ * @buf: PAGE_SIZE'd buffer into which data should be saved.
+ **/
+static int toi_bio_save_config_info(char *buf)
+{
+ int *ints = (int *) buf;
+ ints[0] = target_outstanding_io;
+ return sizeof(int);
+}
+
+/**
+ * toi_bio_load_config_info - restore block I/O config
+ * @buf: Data to be reloaded.
+ * @size: Size of the buffer saved.
+ **/
+static void toi_bio_load_config_info(char *buf, int size)
+{
+ int *ints = (int *) buf;
+ target_outstanding_io = ints[0];
+}
+
+void close_resume_dev_t(int force)
+{
+ if (!resume_block_device)
+ return;
+
+ if (force)
+ atomic_set(&resume_bdev_open_count, 0);
+ else
+ atomic_dec(&resume_bdev_open_count);
+
+ if (!atomic_read(&resume_bdev_open_count)) {
+ toi_close_bdev(resume_block_device);
+ resume_block_device = NULL;
+ }
+}
+
+int open_resume_dev_t(int force, int quiet)
+{
+ if (force) {
+ close_resume_dev_t(1);
+ atomic_set(&resume_bdev_open_count, 1);
+ } else
+ atomic_inc(&resume_bdev_open_count);
+
+ if (resume_block_device)
+ return 0;
+
+ resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0);
+ if (IS_ERR(resume_block_device)) {
+ if (!quiet)
+ toi_early_boot_message(1, TOI_CONTINUE_REQ,
+ "Failed to open device %x, where"
+ " the header should be found.",
+ resume_dev_t);
+ resume_block_device = NULL;
+ atomic_set(&resume_bdev_open_count, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * toi_bio_initialise - initialise bio code at start of some action
+ * @starting_cycle: Whether starting a hibernation cycle, or just reading or
+ * writing a sysfs value.
+ **/
+static int toi_bio_initialise(int starting_cycle)
+{
+ int result;
+
+ if (!starting_cycle || !resume_dev_t)
+ return 0;
+
+ max_outstanding_writes = 0;
+ max_outstanding_reads = 0;
+ current_stream = 0;
+ toi_queue_flusher = current;
+#ifdef MEASURE_MUTEX_CONTENTION
+ {
+ int i, j, k;
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ for_each_online_cpu(k)
+ mutex_times[i][j][k] = 0;
+ }
+#endif
+ result = open_resume_dev_t(0, 1);
+
+ if (result)
+ return result;
+
+ return get_signature_page();
+}
+
+static unsigned long raw_to_real(unsigned long raw)
+{
+ unsigned long extra;
+
+ extra = (raw * (sizeof(unsigned long) + sizeof(int)) +
+ (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
+ (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
+
+ return raw > extra ? raw - extra : 0;
+}
+
+static unsigned long toi_bio_storage_available(void)
+{
+ unsigned long sum = 0;
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled ||
+ this_module->type != BIO_ALLOCATOR_MODULE)
+ continue;
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking storage "
+ "available from %s.", this_module->name);
+ sum += this_module->bio_allocator_ops->storage_available();
+ }
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Total storage available is %lu "
+ "pages (%d header pages).", sum, header_pages_reserved);
+
+ return sum > header_pages_reserved ?
+ raw_to_real(sum - header_pages_reserved) : 0;
+
+}
+
+static unsigned long toi_bio_storage_allocated(void)
+{
+ return raw_pages_allocd > header_pages_reserved ?
+ raw_to_real(raw_pages_allocd - header_pages_reserved) : 0;
+}
+
+/*
+ * If we have read part of the image, we might have filled memory with
+ * data that should be zeroed out.
+ */
+static void toi_bio_noresume_reset(void)
+{
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_noresume_reset.");
+ toi_rw_cleanup(READ);
+ free_all_bdev_info();
+}
+
+/**
+ * toi_bio_cleanup - cleanup after some action
+ * @finishing_cycle: Whether completing a cycle.
+ **/
+static void toi_bio_cleanup(int finishing_cycle)
+{
+ if (!finishing_cycle)
+ return;
+
+ if (toi_writer_buffer) {
+ toi_free_page(11, (unsigned long) toi_writer_buffer);
+ toi_writer_buffer = NULL;
+ }
+
+ forget_signature_page();
+
+ if (header_block_device && toi_sig_data &&
+ toi_sig_data->header_dev_t != resume_dev_t)
+ toi_close_bdev(header_block_device);
+
+ header_block_device = NULL;
+
+ close_resume_dev_t(0);
+}
+
+static int toi_bio_write_header_init(void)
+{
+ int result;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_write_header_init");
+ toi_rw_init(WRITE, 0);
+ toi_writer_buffer_posn = 0;
+
+ /* Info needed to bootstrap goes at the start of the header.
+ * First we save the positions and devinfo, including the number
+ * of header pages. Then we save the structs containing data needed
+ * for reading the header pages back.
+ * Note that even if header pages take more than one page, when we
+ * read back the info, we will have restored the location of the
+ * next header page by the time we go to use it.
+ */
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise extent chains.");
+ result = toi_serialise_extent_chains();
+
+ if (result)
+ return result;
+
+ /*
+ * Signature page hasn't been modified at this point. Write it in
+ * the header so we can restore it later.
+ */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise signature page.");
+ return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops,
+ (char *) toi_cur_sig_page,
+ PAGE_SIZE);
+}
+
+static int toi_bio_write_header_cleanup(void)
+{
+ int result = 0;
+
+ if (toi_writer_buffer_posn)
+ toi_bio_queue_write(&toi_writer_buffer);
+
+ result = toi_finish_all_io();
+
+ unowned = 0;
+ total_header_bytes = 0;
+
+ /* Set signature to save we have an image */
+ if (!result)
+ result = toi_bio_mark_have_image();
+
+ return result;
+}
+
+/*
+ * toi_bio_read_header_init()
+ *
+ * Description:
+ * 1. Attempt to read the device specified with resume=.
+ * 2. Check the contents of the swap header for our signature.
+ * 3. Warn, ignore, reset and/or continue as appropriate.
+ * 4. If continuing, read the toi_swap configuration section
+ * of the header and set up block device info so we can read
+ * the rest of the header & image.
+ *
+ * Returns:
+ * May not return if user choose to reboot at a warning.
+ * -EINVAL if cannot resume at this time. Booting should continue
+ * normally.
+ */
+
+static int toi_bio_read_header_init(void)
+{
+ int result = 0;
+ char buf[32];
+
+ toi_writer_buffer_posn = 0;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_init");
+
+ if (!toi_sig_data) {
+ printk(KERN_INFO "toi_bio_read_header_init called when we "
+ "haven't verified there is an image!\n");
+ return -EINVAL;
+ }
+
+ /*
+ * If the header is not on the resume_swap_dev_t, get the resume device
+ * first.
+ */
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "Header dev_t is %lx.",
+ toi_sig_data->header_dev_t);
+ if (toi_sig_data->have_uuid) {
+ struct fs_info seek;
+ dev_t device;
+
+ strncpy((char *) seek.uuid, toi_sig_data->header_uuid, 16);
+ seek.dev_t = toi_sig_data->header_dev_t;
+ seek.last_mount_size = 0;
+ device = blk_lookup_fs_info(&seek);
+ if (device) {
+ printk("Using dev_t %s, returned by blk_lookup_fs_info.\n",
+ format_dev_t(buf, device));
+ toi_sig_data->header_dev_t = device;
+ }
+ }
+ if (toi_sig_data->header_dev_t != resume_dev_t) {
+ header_block_device = toi_open_bdev(NULL,
+ toi_sig_data->header_dev_t, 1);
+
+ if (IS_ERR(header_block_device))
+ return PTR_ERR(header_block_device);
+ } else
+ header_block_device = resume_block_device;
+
+ if (!toi_writer_buffer)
+ toi_writer_buffer = (char *) toi_get_zeroed_page(11,
+ TOI_ATOMIC_GFP);
+ more_readahead = 1;
+
+ /*
+ * Read toi_swap configuration.
+ * Headerblock size taken into account already.
+ */
+ result = toi_bio_ops.bdev_page_io(READ, header_block_device,
+ toi_sig_data->first_header_block,
+ virt_to_page((unsigned long) toi_writer_buffer));
+ if (result)
+ return result;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "load extent chains.");
+ result = toi_load_extent_chains();
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "load original signature page.");
+ toi_orig_sig_page = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+ if (!toi_orig_sig_page) {
+ printk(KERN_ERR "Failed to allocate memory for the current"
+ " image signature.\n");
+ return -ENOMEM;
+ }
+
+ return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops,
+ (char *) toi_orig_sig_page,
+ PAGE_SIZE);
+}
+
+static int toi_bio_read_header_cleanup(void)
+{
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup.");
+ return toi_rw_cleanup(READ);
+}
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+/*
+ * UUID must be 32 chars long. It may have dashes, but nothing
+ * else.
+ */
+char *uuid_from_commandline(char *commandline)
+{
+ int low = 0;
+ char *result = NULL, *output, *ptr;
+
+ if (strncmp(commandline, "UUID=", 5))
+ return NULL;
+
+ result = kzalloc(17, GFP_KERNEL);
+ if (!result) {
+ printk("Failed to kzalloc UUID text memory.\n");
+ return NULL;
+ }
+
+ ptr = commandline + 5;
+ output = result;
+
+ while (*ptr && (output - result) < 16) {
+ if (isxdigit(*ptr)) {
+ int value = isdigit(*ptr) ? *ptr - '0' :
+ TOLOWER(*ptr) - 'a' + 10;
+ if (low) {
+ *output += value;
+ output++;
+ } else {
+ *output = value << 4;
+ }
+ low = !low;
+ } else if (*ptr != '-')
+ break;
+ ptr++;
+ }
+
+ if ((output - result) < 16 || *ptr) {
+ printk(KERN_DEBUG "Found resume=UUID=, but the value looks "
+ "invalid.\n");
+ kfree(result);
+ result = NULL;
+ }
+
+ return result;
+}
+
+#define retry_if_fails(command) \
+do { \
+ command; \
+ if (!resume_dev_t && !waited_for_device_probe) { \
+ wait_for_device_probe(); \
+ command; \
+ waited_for_device_probe = 1; \
+ } \
+} while(0)
+
+/**
+ * try_to_open_resume_device: Try to parse and open resume=
+ *
+ * Any "swap:" has been stripped away and we just have the path to deal with.
+ * We attempt to do name_to_dev_t, open and stat the file. Having opened the
+ * file, get the struct block_device * to match.
+ */
+static int try_to_open_resume_device(char *commandline, int quiet)
+{
+ struct kstat stat;
+ int error = 0;
+ char *uuid = uuid_from_commandline(commandline);
+ int waited_for_device_probe = 0;
+
+ resume_dev_t = MKDEV(0, 0);
+
+ if (!strlen(commandline))
+ retry_if_fails(toi_bio_scan_for_image(quiet));
+
+ if (uuid) {
+ struct fs_info seek;
+ strncpy((char *) &seek.uuid, uuid, 16);
+ seek.dev_t = resume_dev_t;
+ seek.last_mount_size = 0;
+ retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek));
+ kfree(uuid);
+ }
+
+ if (!resume_dev_t)
+ retry_if_fails(resume_dev_t = name_to_dev_t(commandline));
+
+ if (!resume_dev_t) {
+ struct file *file = filp_open(commandline,
+ O_RDONLY|O_LARGEFILE, 0);
+
+ if (!IS_ERR(file) && file) {
+ vfs_getattr(&file->f_path, &stat);
+ filp_close(file, NULL);
+ } else
+ error = vfs_stat(commandline, &stat);
+ if (!error)
+ resume_dev_t = stat.rdev;
+ }
+
+ if (!resume_dev_t) {
+ if (quiet)
+ return 1;
+
+ if (test_toi_state(TOI_TRYING_TO_RESUME))
+ toi_early_boot_message(1, toi_translate_err_default,
+ "Failed to translate \"%s\" into a device id.\n",
+ commandline);
+ else
+ printk("TuxOnIce: Can't translate \"%s\" into a device "
+ "id yet.\n", commandline);
+ return 1;
+ }
+
+ return open_resume_dev_t(1, quiet);
+}
+
+/*
+ * Parse Image Location
+ *
+ * Attempt to parse a resume= parameter.
+ * Swap Writer accepts:
+ * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
+ *
+ * Where:
+ * DEVNAME is convertable to a dev_t by name_to_dev_t
+ * FIRSTBLOCK is the location of the first block in the swap file
+ * (specifying for a swap partition is nonsensical but not prohibited).
+ * Data is validated by attempting to read a swap header from the
+ * location given. Failure will result in toi_swap refusing to
+ * save an image, and a reboot with correct parameters will be
+ * necessary.
+ */
+static int toi_bio_parse_sig_location(char *commandline,
+ int only_allocator, int quiet)
+{
+ char *thischar, *devstart, *colon = NULL;
+ int signature_found, result = -EINVAL, temp_result = 0;
+
+ if (strncmp(commandline, "swap:", 5) &&
+ strncmp(commandline, "file:", 5)) {
+ /*
+ * Failing swap:, we'll take a simple resume=/dev/hda2, or a
+ * blank value (scan) but fall through to other allocators
+ * if /dev/ or UUID= isn't matched.
+ */
+ if (strncmp(commandline, "/dev/", 5) &&
+ strncmp(commandline, "UUID=", 5) &&
+ strlen(commandline))
+ return 1;
+ } else
+ commandline += 5;
+
+ devstart = commandline;
+ thischar = commandline;
+ while ((*thischar != ':') && (*thischar != '@') &&
+ ((thischar - commandline) < 250) && (*thischar))
+ thischar++;
+
+ if (*thischar == ':') {
+ colon = thischar;
+ *colon = 0;
+ thischar++;
+ }
+
+ while ((thischar - commandline) < 250 && *thischar)
+ thischar++;
+
+ if (colon) {
+ unsigned long block;
+ temp_result = kstrtoul(colon + 1, 0, &block);
+ if (!temp_result)
+ resume_firstblock = (int) block;
+ } else
+ resume_firstblock = 0;
+
+ clear_toi_state(TOI_CAN_HIBERNATE);
+ clear_toi_state(TOI_CAN_RESUME);
+
+ if (!temp_result)
+ temp_result = try_to_open_resume_device(devstart, quiet);
+
+ if (colon)
+ *colon = ':';
+
+ /* No error if we only scanned */
+ if (temp_result)
+ return strlen(commandline) ? -EINVAL : 1;
+
+ signature_found = toi_bio_image_exists(quiet);
+
+ if (signature_found != -1) {
+ result = 0;
+ /*
+ * TODO: If only file storage, CAN_HIBERNATE should only be
+ * set if file allocator's target is valid.
+ */
+ set_toi_state(TOI_CAN_HIBERNATE);
+ set_toi_state(TOI_CAN_RESUME);
+ } else
+ if (!quiet)
+ printk(KERN_ERR "TuxOnIce: Block I/O: No "
+ "signature found at %s.\n", devstart);
+
+ return result;
+}
+
+static void toi_bio_release_storage(void)
+{
+ header_pages_reserved = 0;
+ raw_pages_allocd = 0;
+
+ free_all_bdev_info();
+}
+
+/* toi_swap_remove_image
+ *
+ */
+static int toi_bio_remove_image(void)
+{
+ int result;
+
+ toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_remove_image.");
+
+ result = toi_bio_restore_original_signature();
+
+ /*
+ * We don't do a sanity check here: we want to restore the swap
+ * whatever version of kernel made the hibernate image.
+ *
+ * We need to write swap, but swap may not be enabled so
+ * we write the device directly
+ *
+ * If we don't have an current_signature_page, we didn't
+ * read an image header, so don't change anything.
+ */
+
+ toi_bio_release_storage();
+
+ return result;
+}
+
+struct toi_bio_ops toi_bio_ops = {
+ .bdev_page_io = toi_bdev_page_io,
+ .register_storage = toi_register_storage_chain,
+ .free_storage = toi_bio_release_storage,
+};
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
+ 0, 16384, 0, NULL),
+};
+
+struct toi_module_ops toi_blockwriter_ops = {
+ .type = WRITER_MODULE,
+ .name = "block i/o",
+ .directory = "block_io",
+ .module = THIS_MODULE,
+ .memory_needed = toi_bio_memory_needed,
+ .print_debug_info = toi_bio_print_debug_stats,
+ .storage_needed = toi_bio_storage_needed,
+ .save_config_info = toi_bio_save_config_info,
+ .load_config_info = toi_bio_load_config_info,
+ .initialise = toi_bio_initialise,
+ .cleanup = toi_bio_cleanup,
+ .post_atomic_restore = toi_bio_chains_post_atomic,
+
+ .rw_init = toi_rw_init,
+ .rw_cleanup = toi_rw_cleanup,
+ .read_page = toi_bio_read_page,
+ .write_page = toi_bio_write_page,
+ .rw_header_chunk = toi_rw_header_chunk,
+ .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead,
+ .io_flusher = bio_io_flusher,
+ .update_throughput_throttle = update_throughput_throttle,
+ .finish_all_io = toi_finish_all_io,
+
+ .noresume_reset = toi_bio_noresume_reset,
+ .storage_available = toi_bio_storage_available,
+ .storage_allocated = toi_bio_storage_allocated,
+ .reserve_header_space = toi_bio_reserve_header_space,
+ .allocate_storage = toi_bio_allocate_storage,
+ .free_unused_storage = toi_bio_free_unused_storage,
+ .image_exists = toi_bio_image_exists,
+ .mark_resume_attempted = toi_bio_mark_resume_attempted,
+ .write_header_init = toi_bio_write_header_init,
+ .write_header_cleanup = toi_bio_write_header_cleanup,
+ .read_header_init = toi_bio_read_header_init,
+ .read_header_cleanup = toi_bio_read_header_cleanup,
+ .get_header_version = toi_bio_get_header_version,
+ .remove_image = toi_bio_remove_image,
+ .parse_sig_location = toi_bio_parse_sig_location,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/**
+ * toi_block_io_load - load time routine for block I/O module
+ *
+ * Register block i/o ops and sysfs entries.
+ **/
+static __init int toi_block_io_load(void)
+{
+ return toi_register_module(&toi_blockwriter_ops);
+}
+
+late_initcall(toi_block_io_load);
diff --git a/kernel/power/tuxonice_bio_internal.h b/kernel/power/tuxonice_bio_internal.h
new file mode 100644
index 000000000..cf9211ed9
--- /dev/null
+++ b/kernel/power/tuxonice_bio_internal.h
@@ -0,0 +1,101 @@
+/*
+ * kernel/power/tuxonice_bio_internal.h
+ *
+ * Copyright (C) 2009-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_bio.c, which contains low level io functions.
+ */
+
+/* Extent chains */
+void toi_extent_state_goto_start(void);
+void toi_extent_state_save(int slot);
+int go_next_page(int writing, int section_barrier);
+void toi_extent_state_restore(int slot);
+void free_all_bdev_info(void);
+int devices_of_same_priority(struct toi_bdev_info *this);
+int toi_register_storage_chain(struct toi_bdev_info *new);
+int toi_serialise_extent_chains(void);
+int toi_load_extent_chains(void);
+int toi_bio_rw_page(int writing, struct page *page, int is_readahead,
+ int free_group);
+int toi_bio_restore_original_signature(void);
+int toi_bio_devinfo_storage_needed(void);
+unsigned long get_headerblock(void);
+dev_t get_header_dev_t(void);
+struct block_device *get_header_bdev(void);
+int toi_bio_allocate_storage(unsigned long request);
+void toi_bio_free_unused_storage(void);
+
+/* Signature functions */
+#define HaveImage "HaveImage"
+#define NoImage "TuxOnIce"
+#define sig_size (sizeof(HaveImage))
+
+struct sig_data {
+ char sig[sig_size];
+ int have_image;
+ int resumed_before;
+
+ char have_uuid;
+ char header_uuid[17];
+ dev_t header_dev_t;
+ unsigned long first_header_block;
+
+ /* Repeat the signature to be sure we have a header version */
+ char sig2[sig_size];
+ int header_version;
+};
+
+void forget_signature_page(void);
+int toi_check_for_signature(void);
+int toi_bio_image_exists(int quiet);
+int get_signature_page(void);
+int toi_bio_mark_resume_attempted(int);
+extern char *toi_cur_sig_page;
+extern char *toi_orig_sig_page;
+int toi_bio_mark_have_image(void);
+extern struct sig_data *toi_sig_data;
+extern dev_t resume_dev_t;
+extern struct block_device *resume_block_device;
+extern struct block_device *header_block_device;
+extern unsigned long resume_firstblock;
+
+struct block_device *open_bdev(dev_t device, int display_errs);
+extern int current_stream;
+extern int more_readahead;
+int toi_do_io(int writing, struct block_device *bdev, long block0,
+ struct page *page, int is_readahead, int syncio, int free_group);
+int get_main_pool_phys_params(void);
+
+void toi_close_bdev(struct block_device *bdev);
+struct block_device *toi_open_bdev(char *uuid, dev_t default_device,
+ int display_errs);
+
+extern struct toi_module_ops toi_blockwriter_ops;
+void dump_block_chains(void);
+void debug_broken_header(void);
+extern unsigned long raw_pages_allocd, header_pages_reserved;
+int toi_bio_chains_debug_info(char *buffer, int size);
+void toi_bio_chains_post_atomic(struct toi_boot_kernel_data *bkd);
+int toi_bio_scan_for_image(int quiet);
+int toi_bio_get_header_version(void);
+
+void close_resume_dev_t(int force);
+int open_resume_dev_t(int force, int quiet);
+
+struct toi_incremental_image_pointer_saved_data {
+ unsigned long block;
+ int chain;
+};
+
+struct toi_incremental_image_pointer {
+ struct toi_incremental_image_pointer_saved_data save;
+ struct block_device *bdev;
+ unsigned long block;
+};
+
+void toi_bio_store_inc_image_ptr(struct toi_incremental_image_pointer *ptr);
+void toi_bio_restore_inc_image_ptr(struct toi_incremental_image_pointer *ptr);
diff --git a/kernel/power/tuxonice_bio_signature.c b/kernel/power/tuxonice_bio_signature.c
new file mode 100644
index 000000000..ead874f8e
--- /dev/null
+++ b/kernel/power/tuxonice_bio_signature.c
@@ -0,0 +1,403 @@
+/*
+ * kernel/power/tuxonice_bio_signature.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ */
+
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_bio_internal.h"
+
+struct sig_data *toi_sig_data;
+
+/* Struct of swap header pages */
+
+struct old_sig_data {
+ dev_t device;
+ unsigned long sector;
+ int resume_attempted;
+ int orig_sig_type;
+};
+
+union diskpage {
+ union swap_header swh; /* swh.magic is the only member used */
+ struct sig_data sig_data;
+ struct old_sig_data old_sig_data;
+};
+
+union p_diskpage {
+ union diskpage *pointer;
+ char *ptr;
+ unsigned long address;
+};
+
+char *toi_cur_sig_page;
+char *toi_orig_sig_page;
+int have_image;
+int have_old_image;
+
+int get_signature_page(void)
+{
+ if (!toi_cur_sig_page) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0,
+ "Allocating current signature page.");
+ toi_cur_sig_page = (char *) toi_get_zeroed_page(38,
+ TOI_ATOMIC_GFP);
+ if (!toi_cur_sig_page) {
+ printk(KERN_ERR "Failed to allocate memory for the "
+ "current image signature.\n");
+ return -ENOMEM;
+ }
+
+ toi_sig_data = (struct sig_data *) toi_cur_sig_page;
+ }
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Reading signature from dev %lx,"
+ " sector %d.",
+ resume_block_device->bd_dev, resume_firstblock);
+
+ return toi_bio_ops.bdev_page_io(READ, resume_block_device,
+ resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+void forget_signature_page(void)
+{
+ if (toi_cur_sig_page) {
+ toi_sig_data = NULL;
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_cur_sig_page"
+ " (%p).", toi_cur_sig_page);
+ toi_free_page(38, (unsigned long) toi_cur_sig_page);
+ toi_cur_sig_page = NULL;
+ }
+
+ if (toi_orig_sig_page) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing toi_orig_sig_page"
+ " (%p).", toi_orig_sig_page);
+ toi_free_page(38, (unsigned long) toi_orig_sig_page);
+ toi_orig_sig_page = NULL;
+ }
+}
+
+/*
+ * We need to ensure we use the signature page that's currently on disk,
+ * so as to not remove the image header. Post-atomic-restore, the orig sig
+ * page will be empty, so we can use that as our method of knowing that we
+ * need to load the on-disk signature and not use the non-image sig in
+ * memory. (We're going to powerdown after writing the change, so it's safe.
+ */
+int toi_bio_mark_resume_attempted(int flag)
+{
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Make resume attempted = %d.",
+ flag);
+ if (!toi_orig_sig_page) {
+ forget_signature_page();
+ get_signature_page();
+ }
+ toi_sig_data->resumed_before = flag;
+ return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+ resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+int toi_bio_mark_have_image(void)
+{
+ int result = 0;
+ char buf[32];
+ struct fs_info *fs_info;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that an image exists.");
+ memcpy(toi_sig_data->sig, tuxonice_signature,
+ sizeof(tuxonice_signature));
+ toi_sig_data->have_image = 1;
+ toi_sig_data->resumed_before = 0;
+ toi_sig_data->header_dev_t = get_header_dev_t();
+ toi_sig_data->have_uuid = 0;
+
+ fs_info = fs_info_from_block_dev(get_header_bdev());
+ if (fs_info && !IS_ERR(fs_info)) {
+ memcpy(toi_sig_data->header_uuid, &fs_info->uuid, 16);
+ free_fs_info(fs_info);
+ } else
+ result = (int) PTR_ERR(fs_info);
+
+ if (!result) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Got uuid for dev_t %s.",
+ format_dev_t(buf, get_header_dev_t()));
+ toi_sig_data->have_uuid = 1;
+ } else
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Could not get uuid for "
+ "dev_t %s.",
+ format_dev_t(buf, get_header_dev_t()));
+
+ toi_sig_data->first_header_block = get_headerblock();
+ have_image = 1;
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is %x. First block "
+ "is %d.", toi_sig_data->header_dev_t,
+ toi_sig_data->first_header_block);
+
+ memcpy(toi_sig_data->sig2, tuxonice_signature,
+ sizeof(tuxonice_signature));
+ toi_sig_data->header_version = TOI_HEADER_VERSION;
+
+ return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+ resume_firstblock, virt_to_page(toi_cur_sig_page));
+}
+
+int remove_old_signature(void)
+{
+ union p_diskpage swap_header_page = (union p_diskpage) toi_cur_sig_page;
+ char *orig_sig;
+ char *header_start = (char *) toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+ int result;
+ struct block_device *header_bdev;
+ struct old_sig_data *old_sig_data =
+ &swap_header_page.pointer->old_sig_data;
+
+ header_bdev = toi_open_bdev(NULL, old_sig_data->device, 1);
+ result = toi_bio_ops.bdev_page_io(READ, header_bdev,
+ old_sig_data->sector, virt_to_page(header_start));
+
+ if (result)
+ goto out;
+
+ /*
+ * TODO: Get the original contents of the first bytes of the swap
+ * header page.
+ */
+ if (!old_sig_data->orig_sig_type)
+ orig_sig = "SWAP-SPACE";
+ else
+ orig_sig = "SWAPSPACE2";
+
+ memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
+ memcpy(swap_header_page.ptr, header_start, 10);
+
+ result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+ resume_firstblock, virt_to_page(swap_header_page.ptr));
+
+out:
+ toi_close_bdev(header_bdev);
+ have_old_image = 0;
+ toi_free_page(38, (unsigned long) header_start);
+ return result;
+}
+
+/*
+ * toi_bio_restore_original_signature - restore the original signature
+ *
+ * At boot time (aborting pre atomic-restore), toi_orig_sig_page gets used.
+ * It will have the original signature page contents, stored in the image
+ * header. Post atomic-restore, we use :toi_cur_sig_page, which will contain
+ * the contents that were loaded when we started the cycle.
+ */
+int toi_bio_restore_original_signature(void)
+{
+ char *use = toi_orig_sig_page ? toi_orig_sig_page : toi_cur_sig_page;
+
+ if (have_old_image)
+ return remove_old_signature();
+
+ if (!use) {
+ printk("toi_bio_restore_original_signature: No signature "
+ "page loaded.\n");
+ return 0;
+ }
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Recording that no image exists.");
+ have_image = 0;
+ toi_sig_data->have_image = 0;
+ return toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+ resume_firstblock, virt_to_page(use));
+}
+
+/*
+ * check_for_signature - See whether we have an image.
+ *
+ * Returns 0 if no image, 1 if there is one, -1 if indeterminate.
+ */
+int toi_check_for_signature(void)
+{
+ union p_diskpage swap_header_page;
+ int type;
+ const char *normal_sigs[] = {"SWAP-SPACE", "SWAPSPACE2" };
+ const char *swsusp_sigs[] = {"S1SUSP", "S2SUSP", "S1SUSPEND" };
+ char *swap_header;
+
+ if (!toi_cur_sig_page) {
+ int result = get_signature_page();
+
+ if (result)
+ return result;
+ }
+
+ /*
+ * Start by looking for the binary header.
+ */
+ if (!memcmp(tuxonice_signature, toi_cur_sig_page,
+ sizeof(tuxonice_signature))) {
+ have_image = toi_sig_data->have_image;
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Have binary signature. "
+ "Have image is %d.", have_image);
+ if (have_image)
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "header dev_t is "
+ "%x. First block is %d.",
+ toi_sig_data->header_dev_t,
+ toi_sig_data->first_header_block);
+ return toi_sig_data->have_image;
+ }
+
+ /*
+ * Failing that, try old file allocator headers.
+ */
+
+ if (!memcmp(HaveImage, toi_cur_sig_page, strlen(HaveImage))) {
+ have_image = 1;
+ return 1;
+ }
+
+ have_image = 0;
+
+ if (!memcmp(NoImage, toi_cur_sig_page, strlen(NoImage)))
+ return 0;
+
+ /*
+ * Nope? How about swap?
+ */
+ swap_header_page = (union p_diskpage) toi_cur_sig_page;
+ swap_header = swap_header_page.pointer->swh.magic.magic;
+
+ /* Normal swapspace? */
+ for (type = 0; type < 2; type++)
+ if (!memcmp(normal_sigs[type], swap_header,
+ strlen(normal_sigs[type])))
+ return 0;
+
+ /* Swsusp or uswsusp? */
+ for (type = 0; type < 3; type++)
+ if (!memcmp(swsusp_sigs[type], swap_header,
+ strlen(swsusp_sigs[type])))
+ return 2;
+
+ /* Old TuxOnIce version? */
+ if (!memcmp(tuxonice_signature, swap_header,
+ sizeof(tuxonice_signature) - 1)) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Found old TuxOnIce "
+ "signature.");
+ have_old_image = 1;
+ return 3;
+ }
+
+ return -1;
+}
+
+/*
+ * Image_exists
+ *
+ * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
+ */
+int toi_bio_image_exists(int quiet)
+{
+ int result;
+ char *msg = NULL;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_bio_image_exists.");
+
+ if (!resume_dev_t) {
+ if (!quiet)
+ printk(KERN_INFO "Not even trying to read header "
+ "because resume_dev_t is not set.\n");
+ return -1;
+ }
+
+ if (open_resume_dev_t(0, quiet))
+ return -1;
+
+ result = toi_check_for_signature();
+
+ clear_toi_state(TOI_RESUMED_BEFORE);
+ if (toi_sig_data->resumed_before)
+ set_toi_state(TOI_RESUMED_BEFORE);
+
+ if (quiet || result == -ENOMEM)
+ return result;
+
+ if (result == -1)
+ msg = "TuxOnIce: Unable to find a signature."
+ " Could you have moved a swap file?\n";
+ else if (!result)
+ msg = "TuxOnIce: No image found.\n";
+ else if (result == 1)
+ msg = "TuxOnIce: Image found.\n";
+ else if (result == 2)
+ msg = "TuxOnIce: uswsusp or swsusp image found.\n";
+ else if (result == 3)
+ msg = "TuxOnIce: Old implementation's signature found.\n";
+
+ printk(KERN_INFO "%s", msg);
+
+ return result;
+}
+
+int toi_bio_scan_for_image(int quiet)
+{
+ struct block_device *bdev;
+ char default_name[255] = "";
+
+ if (!quiet)
+ printk(KERN_DEBUG "Scanning swap devices for TuxOnIce "
+ "signature...\n");
+ for (bdev = next_bdev_of_type(NULL, "swap"); bdev;
+ bdev = next_bdev_of_type(bdev, "swap")) {
+ int result;
+ char name[255] = "";
+ sprintf(name, "%u:%u", MAJOR(bdev->bd_dev),
+ MINOR(bdev->bd_dev));
+ if (!quiet)
+ printk(KERN_DEBUG "- Trying %s.\n", name);
+ resume_block_device = bdev;
+ resume_dev_t = bdev->bd_dev;
+
+ result = toi_check_for_signature();
+
+ resume_block_device = NULL;
+ resume_dev_t = MKDEV(0, 0);
+
+ if (!default_name[0])
+ strcpy(default_name, name);
+
+ if (result == 1) {
+ /* Got one! */
+ strcpy(resume_file, name);
+ next_bdev_of_type(bdev, NULL);
+ if (!quiet)
+ printk(KERN_DEBUG " ==> Image found on %s.\n",
+ resume_file);
+ return 1;
+ }
+ forget_signature_page();
+ }
+
+ if (!quiet)
+ printk(KERN_DEBUG "TuxOnIce scan: No image found.\n");
+ strcpy(resume_file, default_name);
+ return 0;
+}
+
+int toi_bio_get_header_version(void)
+{
+ return (memcmp(toi_sig_data->sig2, tuxonice_signature,
+ sizeof(tuxonice_signature))) ?
+ 0 : toi_sig_data->header_version;
+
+}
diff --git a/kernel/power/tuxonice_builtin.c b/kernel/power/tuxonice_builtin.c
new file mode 100644
index 000000000..0a6733ae0
--- /dev/null
+++ b/kernel/power/tuxonice_builtin.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/kernel.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/bio.h>
+#include <linux/root_dev.h>
+#include <linux/freezer.h>
+#include <linux/reboot.h>
+#include <linux/writeback.h>
+#include <linux/tty.h>
+#include <linux/crypto.h>
+#include <linux/cpu.h>
+#include <linux/ctype.h>
+#include <linux/kthread.h>
+#include "tuxonice_io.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_alloc.h"
+
+unsigned long toi_bootflags_mask;
+
+/*
+ * Highmem related functions (x86 only).
+ */
+
+#ifdef CONFIG_HIGHMEM
+
+/**
+ * copyback_high: Restore highmem pages.
+ *
+ * Highmem data and pbe lists are/can be stored in highmem.
+ * The format is slightly different to the lowmem pbe lists
+ * used for the assembly code: the last pbe in each page is
+ * a struct page * instead of struct pbe *, pointing to the
+ * next page where pbes are stored (or NULL if happens to be
+ * the end of the list). Since we don't want to generate
+ * unnecessary deltas against swsusp code, we use a cast
+ * instead of a union.
+ **/
+
+static void copyback_high(void)
+{
+ struct page *pbe_page = (struct page *) restore_highmem_pblist;
+ struct pbe *this_pbe, *first_pbe;
+ unsigned long *origpage, *copypage;
+ int pbe_index = 1;
+
+ if (!pbe_page)
+ return;
+
+ this_pbe = (struct pbe *) kmap_atomic(pbe_page);
+ first_pbe = this_pbe;
+
+ while (this_pbe) {
+ int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
+
+ origpage = kmap_atomic(pfn_to_page((unsigned long) this_pbe->orig_address));
+ copypage = kmap_atomic((struct page *) this_pbe->address);
+
+ while (loop >= 0) {
+ *(origpage + loop) = *(copypage + loop);
+ loop--;
+ }
+
+ kunmap_atomic(origpage);
+ kunmap_atomic(copypage);
+
+ if (!this_pbe->next)
+ break;
+
+ if (pbe_index < PBES_PER_PAGE) {
+ this_pbe++;
+ pbe_index++;
+ } else {
+ pbe_page = (struct page *) this_pbe->next;
+ kunmap_atomic(first_pbe);
+ if (!pbe_page)
+ return;
+ this_pbe = (struct pbe *) kmap_atomic(pbe_page);
+ first_pbe = this_pbe;
+ pbe_index = 1;
+ }
+ }
+ kunmap_atomic(first_pbe);
+}
+
+#else /* CONFIG_HIGHMEM */
+static void copyback_high(void) { }
+#endif
+
+char toi_wait_for_keypress_dev_console(int timeout)
+{
+ int fd, this_timeout = 255, orig_kthread = 0;
+ char key = '\0';
+ struct termios t, t_backup;
+
+ /* We should be guaranteed /dev/console exists after populate_rootfs()
+ * in init/main.c.
+ */
+ fd = sys_open("/dev/console", O_RDONLY, 0);
+ if (fd < 0) {
+ printk(KERN_INFO "Couldn't open /dev/console.\n");
+ return key;
+ }
+
+ if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
+ goto out_close;
+
+ memcpy(&t_backup, &t, sizeof(t));
+
+ t.c_lflag &= ~(ISIG|ICANON|ECHO);
+ t.c_cc[VMIN] = 0;
+
+new_timeout:
+ if (timeout > 0) {
+ this_timeout = timeout < 26 ? timeout : 25;
+ timeout -= this_timeout;
+ this_timeout *= 10;
+ }
+
+ t.c_cc[VTIME] = this_timeout;
+
+ if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
+ goto out_restore;
+
+ if (current->flags & PF_KTHREAD) {
+ orig_kthread = (current->flags & PF_KTHREAD);
+ current->flags &= ~PF_KTHREAD;
+ }
+
+ while (1) {
+ if (sys_read(fd, &key, 1) <= 0) {
+ if (timeout)
+ goto new_timeout;
+ key = '\0';
+ break;
+ }
+ key = tolower(key);
+ if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
+ if (key == 'c') {
+ set_toi_state(TOI_CONTINUE_REQ);
+ break;
+ } else if (key == ' ')
+ break;
+ } else
+ break;
+ }
+ if (orig_kthread) {
+ current->flags |= PF_KTHREAD;
+ }
+
+out_restore:
+ sys_ioctl(fd, TCSETS, (long)&t_backup);
+out_close:
+ sys_close(fd);
+
+ return key;
+}
+
+struct toi_boot_kernel_data toi_bkd __nosavedata
+ __attribute__((aligned(PAGE_SIZE))) = {
+ MY_BOOT_KERNEL_DATA_VERSION,
+ 0,
+#ifdef CONFIG_TOI_REPLACE_SWSUSP
+ (1 << TOI_REPLACE_SWSUSP) |
+#endif
+ (1 << TOI_NO_FLUSHER_THREAD) |
+ (1 << TOI_PAGESET2_FULL),
+};
+
+struct block_device *toi_open_by_devnum(dev_t dev)
+{
+ struct block_device *bdev = bdget(dev);
+ int err = -ENOMEM;
+ if (bdev)
+ err = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
+ return err ? ERR_PTR(err) : bdev;
+}
+
+/**
+ * toi_close_bdev: Close a swap bdev.
+ *
+ * int: The swap entry number to close.
+ */
+void toi_close_bdev(struct block_device *bdev)
+{
+ blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+}
+
+int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
+struct toi_core_fns *toi_core_fns;
+unsigned long toi_result;
+struct pagedir pagedir1 = {1};
+struct toi_cbw **toi_first_cbw;
+int toi_next_cbw;
+
+unsigned long toi_get_nonconflicting_page(void)
+{
+ return toi_core_fns->get_nonconflicting_page();
+}
+
+int toi_post_context_save(void)
+{
+ return toi_core_fns->post_context_save();
+}
+
+int try_tuxonice_hibernate(void)
+{
+ if (!toi_core_fns)
+ return -ENODEV;
+
+ return toi_core_fns->try_hibernate();
+}
+
+static int num_resume_calls;
+#ifdef CONFIG_TOI_IGNORE_LATE_INITCALL
+static int ignore_late_initcall = 1;
+#else
+static int ignore_late_initcall;
+#endif
+
+int toi_translate_err_default = TOI_CONTINUE_REQ;
+
+void try_tuxonice_resume(void)
+{
+ if (!hibernation_available())
+ return;
+
+ /* Don't let it wrap around eventually */
+ if (num_resume_calls < 2)
+ num_resume_calls++;
+
+ if (num_resume_calls == 1 && ignore_late_initcall) {
+ printk(KERN_INFO "TuxOnIce: Ignoring late initcall, as requested.\n");
+ return;
+ }
+
+ if (toi_core_fns)
+ toi_core_fns->try_resume();
+ else
+ printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
+}
+
+int toi_lowlevel_builtin(void)
+{
+ int error = 0;
+
+ save_processor_state();
+ error = swsusp_arch_suspend();
+ if (error)
+ printk(KERN_ERR "Error %d hibernating\n", error);
+
+ /* Restore control flow appears here */
+ if (!toi_in_hibernate) {
+ copyback_high();
+ set_toi_state(TOI_NOW_RESUMING);
+ }
+
+ restore_processor_state();
+ return error;
+}
+
+unsigned long toi_compress_bytes_in;
+unsigned long toi_compress_bytes_out;
+
+int toi_in_suspend(void)
+{
+ return in_suspend;
+}
+
+unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
+ (1 << TOI_IGNORE_LOGLEVEL) |
+ (1 << TOI_IO_STOPPED));
+
+/* The number of hibernates we have started (some may have been cancelled) */
+unsigned int nr_hibernates;
+int toi_running;
+__nosavedata int toi_in_hibernate;
+__nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_trace_allocs;
+
+void toi_read_lock_tasklist(void)
+{
+ read_lock(&tasklist_lock);
+}
+
+void toi_read_unlock_tasklist(void)
+{
+ read_unlock(&tasklist_lock);
+}
+
+#ifdef CONFIG_TOI_ZRAM_SUPPORT
+int (*toi_flag_zram_disks) (void);
+
+int toi_do_flag_zram_disks(void)
+{
+ return toi_flag_zram_disks ? (*toi_flag_zram_disks)() : 0;
+}
+
+#endif
+
+/* toi_generate_free_page_map
+ *
+ * Description: This routine generates a bitmap of free pages from the
+ * lists used by the memory manager. We then use the bitmap
+ * to quickly calculate which pages to save and in which
+ * pagesets.
+ */
+void toi_generate_free_page_map(void)
+{
+ int order, cpu, t;
+ unsigned long flags, i;
+ struct zone *zone;
+ struct list_head *curr;
+ unsigned long pfn;
+ struct page *page;
+
+ for_each_populated_zone(zone) {
+
+ if (!zone->spanned_pages)
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+
+ for (i = 0; i < zone->spanned_pages; i++) {
+ pfn = zone->zone_start_pfn + i;
+
+ if (!pfn_valid(pfn))
+ continue;
+
+ page = pfn_to_page(pfn);
+
+ ClearPageNosaveFree(page);
+ }
+
+ for_each_migratetype_order(order, t) {
+ list_for_each(curr,
+ &zone->free_area[order].free_list[t]) {
+ unsigned long j;
+
+ pfn = page_to_pfn(list_entry(curr, struct page,
+ lru));
+ for (j = 0; j < (1UL << order); j++)
+ SetPageNosaveFree(pfn_to_page(pfn + j));
+ }
+ }
+
+ for_each_online_cpu(cpu) {
+ struct per_cpu_pageset *pset =
+ per_cpu_ptr(zone->pageset, cpu);
+ struct per_cpu_pages *pcp = &pset->pcp;
+ struct page *page;
+ int t;
+
+ for (t = 0; t < MIGRATE_PCPTYPES; t++)
+ list_for_each_entry(page, &pcp->lists[t], lru)
+ SetPageNosaveFree(page);
+ }
+
+ spin_unlock_irqrestore(&zone->lock, flags);
+ }
+}
+
+/* toi_size_of_free_region
+ *
+ * Description: Return the number of pages that are free, beginning with and
+ * including this one.
+ */
+int toi_size_of_free_region(struct zone *zone, unsigned long start_pfn)
+{
+ unsigned long this_pfn = start_pfn,
+ end_pfn = zone_end_pfn(zone);
+
+ while (pfn_valid(this_pfn) && this_pfn < end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
+ this_pfn++;
+
+ return this_pfn - start_pfn;
+}
+
+static int __init toi_wait_setup(char *str)
+{
+ int value;
+
+ if (sscanf(str, "=%d", &value)) {
+ if (value < -1 || value > 255)
+ printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
+ "255.\n");
+ else
+ toi_wait = value;
+ }
+
+ return 1;
+}
+__setup("toi_wait", toi_wait_setup);
+
+static int __init toi_translate_retry_setup(char *str)
+{
+ toi_translate_err_default = 0;
+ return 1;
+}
+__setup("toi_translate_retry", toi_translate_retry_setup);
+
+static int __init toi_debug_setup(char *str)
+{
+ toi_bkd.toi_action |= (1 << TOI_LOGALL);
+ toi_bootflags_mask |= (1 << TOI_LOGALL);
+ toi_bkd.toi_debug_state = 255;
+ toi_bkd.toi_default_console_level = 7;
+ return 1;
+}
+__setup("toi_debug_setup", toi_debug_setup);
+
+static int __init toi_pause_setup(char *str)
+{
+ toi_bkd.toi_action |= (1 << TOI_PAUSE);
+ toi_bootflags_mask |= (1 << TOI_PAUSE);
+ return 1;
+}
+__setup("toi_pause", toi_pause_setup);
+
+#ifdef CONFIG_PM_DEBUG
+static int __init toi_trace_allocs_setup(char *str)
+{
+ int value;
+
+ if (sscanf(str, "=%d", &value))
+ toi_trace_allocs = value;
+
+ return 1;
+}
+__setup("toi_trace_allocs", toi_trace_allocs_setup);
+#endif
+
+static int __init toi_ignore_late_initcall_setup(char *str)
+{
+ int value;
+
+ if (sscanf(str, "=%d", &value))
+ ignore_late_initcall = value;
+
+ return 1;
+}
+__setup("toi_initramfs_resume_only", toi_ignore_late_initcall_setup);
+
+static int __init toi_force_no_multithreaded_setup(char *str)
+{
+ int value;
+
+ toi_bkd.toi_action &= ~(1 << TOI_NO_MULTITHREADED_IO);
+ toi_bootflags_mask |= (1 << TOI_NO_MULTITHREADED_IO);
+
+ if (sscanf(str, "=%d", &value) && value)
+ toi_bkd.toi_action |= (1 << TOI_NO_MULTITHREADED_IO);
+
+ return 1;
+}
+__setup("toi_no_multithreaded", toi_force_no_multithreaded_setup);
+
+#ifdef CONFIG_KGDB
+static int __init toi_post_resume_breakpoint_setup(char *str)
+{
+ int value;
+
+ toi_bkd.toi_action &= ~(1 << TOI_POST_RESUME_BREAKPOINT);
+ toi_bootflags_mask |= (1 << TOI_POST_RESUME_BREAKPOINT);
+ if (sscanf(str, "=%d", &value) && value)
+ toi_bkd.toi_action |= (1 << TOI_POST_RESUME_BREAKPOINT);
+
+ return 1;
+}
+__setup("toi_post_resume_break", toi_post_resume_breakpoint_setup);
+#endif
+
+static int __init toi_disable_readahead_setup(char *str)
+{
+ int value;
+
+ toi_bkd.toi_action &= ~(1 << TOI_NO_READAHEAD);
+ toi_bootflags_mask |= (1 << TOI_NO_READAHEAD);
+ if (sscanf(str, "=%d", &value) && value)
+ toi_bkd.toi_action |= (1 << TOI_NO_READAHEAD);
+
+ return 1;
+}
+__setup("toi_no_readahead", toi_disable_readahead_setup);
diff --git a/kernel/power/tuxonice_builtin.h b/kernel/power/tuxonice_builtin.h
new file mode 100644
index 000000000..9539818e0
--- /dev/null
+++ b/kernel/power/tuxonice_builtin.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <asm/setup.h>
+
+extern struct toi_core_fns *toi_core_fns;
+extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
+extern unsigned int nr_hibernates;
+extern int toi_in_hibernate;
+
+extern __nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_lowlevel_builtin(void);
+
+#ifdef CONFIG_HIGHMEM
+extern __nosavedata struct zone_data *toi_nosave_zone_list;
+extern __nosavedata unsigned long toi_nosave_max_pfn;
+#endif
+
+extern unsigned long toi_get_nonconflicting_page(void);
+extern int toi_post_context_save(void);
+
+extern char toi_wait_for_keypress_dev_console(int timeout);
+extern struct block_device *toi_open_by_devnum(dev_t dev);
+extern void toi_close_bdev(struct block_device *bdev);
+extern int toi_wait;
+extern int toi_translate_err_default;
+extern int toi_force_no_multithreaded;
+extern void toi_read_lock_tasklist(void);
+extern void toi_read_unlock_tasklist(void);
+extern int toi_in_suspend(void);
+extern void toi_generate_free_page_map(void);
+extern int toi_size_of_free_region(struct zone *zone, unsigned long start_pfn);
+
+#ifdef CONFIG_TOI_ZRAM_SUPPORT
+extern int toi_do_flag_zram_disks(void);
+#else
+#define toi_do_flag_zram_disks() (0)
+#endif
diff --git a/kernel/power/tuxonice_checksum.c b/kernel/power/tuxonice_checksum.c
new file mode 100644
index 000000000..8952c0fec
--- /dev/null
+++ b/kernel/power/tuxonice_checksum.c
@@ -0,0 +1,392 @@
+/*
+ * kernel/power/tuxonice_checksum.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+
+static struct toi_module_ops toi_checksum_ops;
+
+/* Constant at the mo, but I might allow tuning later */
+static char toi_checksum_name[32] = "md4";
+/* Bytes per checksum */
+#define CHECKSUM_SIZE (16)
+
+#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
+
+struct cpu_context {
+ struct crypto_hash *transform;
+ struct hash_desc desc;
+ struct scatterlist sg[2];
+ char *buf;
+};
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+static int pages_allocated;
+static unsigned long page_list;
+
+static int toi_num_resaved;
+
+static unsigned long this_checksum, next_page;
+static int checksum_count;
+
+static inline int checksum_pages_needed(void)
+{
+ return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
+}
+
+/* ---- Local buffer management ---- */
+
+/*
+ * toi_checksum_cleanup
+ *
+ * Frees memory allocated for our labours.
+ */
+static void toi_checksum_cleanup(int ending_cycle)
+{
+ int cpu;
+
+ if (ending_cycle) {
+ for_each_online_cpu(cpu) {
+ struct cpu_context *this = &per_cpu(contexts, cpu);
+ if (this->transform) {
+ crypto_free_hash(this->transform);
+ this->transform = NULL;
+ this->desc.tfm = NULL;
+ }
+
+ if (this->buf) {
+ toi_free_page(27, (unsigned long) this->buf);
+ this->buf = NULL;
+ }
+ }
+ }
+}
+
+/*
+ * toi_crypto_initialise
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ * Returns: Int: Zero. Even if we can't set up checksum, we still
+ * seek to hibernate.
+ */
+static int toi_checksum_initialise(int starting_cycle)
+{
+ int cpu;
+
+ if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
+ return 0;
+
+ if (!*toi_checksum_name) {
+ printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
+ return 1;
+ }
+
+ for_each_online_cpu(cpu) {
+ struct cpu_context *this = &per_cpu(contexts, cpu);
+ struct page *page;
+
+ this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0);
+ if (IS_ERR(this->transform)) {
+ printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+ "%s checksum algorithm: %ld.\n",
+ toi_checksum_name, (long) this->transform);
+ this->transform = NULL;
+ return 1;
+ }
+
+ this->desc.tfm = this->transform;
+ this->desc.flags = 0;
+
+ page = toi_alloc_page(27, GFP_KERNEL);
+ if (!page)
+ return 1;
+ this->buf = page_address(page);
+ sg_init_one(&this->sg[0], this->buf, PAGE_SIZE);
+ }
+ return 0;
+}
+
+/*
+ * toi_checksum_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_checksum_print_debug_stats(char *buffer, int size)
+{
+ int len;
+
+ if (!toi_checksum_ops.enabled)
+ return scnprintf(buffer, size,
+ "- Checksumming disabled.\n");
+
+ len = scnprintf(buffer, size, "- Checksum method is '%s'.\n",
+ toi_checksum_name);
+ len += scnprintf(buffer + len, size - len,
+ " %d pages resaved in atomic copy.\n", toi_num_resaved);
+ return len;
+}
+
+static int toi_checksum_memory_needed(void)
+{
+ return toi_checksum_ops.enabled ?
+ checksum_pages_needed() << PAGE_SHIFT : 0;
+}
+
+static int toi_checksum_storage_needed(void)
+{
+ if (toi_checksum_ops.enabled)
+ return strlen(toi_checksum_name) + sizeof(int) + 1;
+ else
+ return 0;
+}
+
+/*
+ * toi_checksum_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_checksum_save_config_info(char *buffer)
+{
+ int namelen = strlen(toi_checksum_name) + 1;
+ int total_len;
+
+ *((unsigned int *) buffer) = namelen;
+ strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
+ total_len = sizeof(unsigned int) + namelen;
+ return total_len;
+}
+
+/* toi_checksum_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description: Reload information needed for dechecksuming the image at
+ * resume time.
+ */
+static void toi_checksum_load_config_info(char *buffer, int size)
+{
+ int namelen;
+
+ namelen = *((unsigned int *) (buffer));
+ strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
+ namelen);
+ return;
+}
+
+/*
+ * Free Checksum Memory
+ */
+
+void free_checksum_pages(void)
+{
+ while (pages_allocated) {
+ unsigned long next = *((unsigned long *) page_list);
+ ClearPageNosave(virt_to_page(page_list));
+ toi_free_page(15, (unsigned long) page_list);
+ page_list = next;
+ pages_allocated--;
+ }
+}
+
+/*
+ * Allocate Checksum Memory
+ */
+
+int allocate_checksum_pages(void)
+{
+ int pages_needed = checksum_pages_needed();
+
+ if (!toi_checksum_ops.enabled)
+ return 0;
+
+ while (pages_allocated < pages_needed) {
+ unsigned long *new_page =
+ (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
+ if (!new_page) {
+ printk(KERN_ERR "Unable to allocate checksum pages.\n");
+ return -ENOMEM;
+ }
+ SetPageNosave(virt_to_page(new_page));
+ (*new_page) = page_list;
+ page_list = (unsigned long) new_page;
+ pages_allocated++;
+ }
+
+ next_page = (unsigned long) page_list;
+ checksum_count = 0;
+
+ return 0;
+}
+
+char *tuxonice_get_next_checksum(void)
+{
+ if (!toi_checksum_ops.enabled)
+ return NULL;
+
+ if (checksum_count % CHECKSUMS_PER_PAGE)
+ this_checksum += CHECKSUM_SIZE;
+ else {
+ this_checksum = next_page + sizeof(void *);
+ next_page = *((unsigned long *) next_page);
+ }
+
+ checksum_count++;
+ return (char *) this_checksum;
+}
+
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+{
+ char *pa;
+ int result, cpu = smp_processor_id();
+ struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+ if (!toi_checksum_ops.enabled)
+ return 0;
+
+ pa = kmap(page);
+ memcpy(ctx->buf, pa, PAGE_SIZE);
+ kunmap(page);
+ result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
+ checksum_locn);
+ if (result)
+ printk(KERN_ERR "TuxOnIce checksumming: crypto_hash_digest "
+ "returned %d.\n", result);
+ return result;
+}
+/*
+ * Calculate checksums
+ */
+
+void check_checksums(void)
+{
+ int index = 0, cpu = smp_processor_id();
+ char current_checksum[CHECKSUM_SIZE];
+ struct cpu_context *ctx = &per_cpu(contexts, cpu);
+ unsigned long pfn;
+
+ if (!toi_checksum_ops.enabled) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksumming disabled.");
+ return;
+ }
+
+ next_page = (unsigned long) page_list;
+
+ toi_num_resaved = 0;
+ this_checksum = 0;
+
+ toi_trace_index++;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Verifying checksums.");
+ memory_bm_position_reset(pageset2_map);
+ for (pfn = memory_bm_next_pfn(pageset2_map, 0); pfn != BM_END_OF_MAP;
+ pfn = memory_bm_next_pfn(pageset2_map, 0)) {
+ int ret, resave_needed = false;
+ char *pa;
+ struct page *page = pfn_to_page(pfn);
+
+ if (index < checksum_count) {
+ if (index % CHECKSUMS_PER_PAGE) {
+ this_checksum += CHECKSUM_SIZE;
+ } else {
+ this_checksum = next_page + sizeof(void *);
+ next_page = *((unsigned long *) next_page);
+ }
+
+ /* Done when IRQs disabled so must be atomic */
+ pa = kmap_atomic(page);
+ memcpy(ctx->buf, pa, PAGE_SIZE);
+ kunmap_atomic(pa);
+ ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
+ current_checksum);
+
+ if (ret) {
+ printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
+ return;
+ }
+
+ resave_needed = memcmp(current_checksum, (char *) this_checksum,
+ CHECKSUM_SIZE);
+ } else {
+ resave_needed = true;
+ }
+
+ if (resave_needed) {
+ TOI_TRACE_DEBUG(pfn, "_Resaving %d", resave_needed);
+ SetPageResave(pfn_to_page(pfn));
+ toi_num_resaved++;
+ if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
+ set_abort_result(TOI_RESAVE_NEEDED);
+ }
+
+ index++;
+ }
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Checksum verification complete.");
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_INT("enabled", SYSFS_RW, &toi_checksum_ops.enabled, 0, 1, 0,
+ NULL),
+ SYSFS_BIT("abort_if_resave_needed", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_ABORT_ON_RESAVE_NEEDED, 0)
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_checksum_ops = {
+ .type = MISC_MODULE,
+ .name = "checksumming",
+ .directory = "checksum",
+ .module = THIS_MODULE,
+ .initialise = toi_checksum_initialise,
+ .cleanup = toi_checksum_cleanup,
+ .print_debug_info = toi_checksum_print_debug_stats,
+ .save_config_info = toi_checksum_save_config_info,
+ .load_config_info = toi_checksum_load_config_info,
+ .memory_needed = toi_checksum_memory_needed,
+ .storage_needed = toi_checksum_storage_needed,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+int toi_checksum_init(void)
+{
+ int result = toi_register_module(&toi_checksum_ops);
+ return result;
+}
+
+void toi_checksum_exit(void)
+{
+ toi_unregister_module(&toi_checksum_ops);
+}
diff --git a/kernel/power/tuxonice_checksum.h b/kernel/power/tuxonice_checksum.h
new file mode 100644
index 000000000..7d6478a6a
--- /dev/null
+++ b/kernel/power/tuxonice_checksum.h
@@ -0,0 +1,31 @@
+/*
+ * kernel/power/tuxonice_checksum.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#if defined(CONFIG_TOI_CHECKSUM)
+extern int toi_checksum_init(void);
+extern void toi_checksum_exit(void);
+void check_checksums(void);
+int allocate_checksum_pages(void);
+void free_checksum_pages(void);
+char *tuxonice_get_next_checksum(void);
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
+#else
+static inline int toi_checksum_init(void) { return 0; }
+static inline void toi_checksum_exit(void) { }
+static inline void check_checksums(void) { };
+static inline int allocate_checksum_pages(void) { return 0; };
+static inline void free_checksum_pages(void) { };
+static inline char *tuxonice_get_next_checksum(void) { return NULL; };
+static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+ { return 0; }
+#endif
+
diff --git a/kernel/power/tuxonice_cluster.c b/kernel/power/tuxonice_cluster.c
new file mode 100644
index 000000000..cfe3383ab
--- /dev/null
+++ b/kernel/power/tuxonice_cluster.c
@@ -0,0 +1,1058 @@
+/*
+ * kernel/power/tuxonice_cluster.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines for cluster hibernation support.
+ *
+ * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
+ *
+ * How does it work?
+ *
+ * There is no 'master' node that tells everyone else what to do. All nodes
+ * send messages to the broadcast address/port, maintain a list of peers
+ * and figure out when to progress to the next step in hibernating or resuming.
+ * This makes us more fault tolerant when it comes to nodes coming and going
+ * (which may be more of an issue if we're hibernating when power supplies
+ * are being unreliable).
+ *
+ * At boot time, we start a ktuxonice thread that handles communication with
+ * other nodes. This node maintains a state machine that controls our progress
+ * through hibernating and resuming, keeping us in step with other nodes. Nodes
+ * are identified by their hw address.
+ *
+ * On startup, the node sends CLUSTER_PING on the configured interface's
+ * broadcast address, port $toi_cluster_port (see below) and begins to listen
+ * for other broadcast messages. CLUSTER_PING messages are repeated at
+ * intervals of 5 minutes, with a random offset to spread traffic out.
+ *
+ * A hibernation cycle is initiated from any node via
+ *
+ * echo > /sys/power/tuxonice/do_hibernate
+ *
+ * and (possibily) the hibernate script. At each step of the process, the node
+ * completes its work, and waits for all other nodes to signal completion of
+ * their work (or timeout) before progressing to the next step.
+ *
+ * Request/state Action before reply Possible reply Next state
+ * HIBERNATE capable, pre-script HIBERNATE|ACK NODE_PREP
+ * HIBERNATE|NACK INIT_0
+ *
+ * PREP prepare_image PREP|ACK IMAGE_WRITE
+ * PREP|NACK INIT_0
+ * ABORT RUNNING
+ *
+ * IO write image IO|ACK power off
+ * ABORT POST_RESUME
+ *
+ * (Boot time) check for image IMAGE|ACK RESUME_PREP
+ * (Note 1)
+ * IMAGE|NACK (Note 2)
+ *
+ * PREP prepare read image PREP|ACK IMAGE_READ
+ * PREP|NACK (As NACK_IMAGE)
+ *
+ * IO read image IO|ACK POST_RESUME
+ *
+ * POST_RESUME thaw, post-script RUNNING
+ *
+ * INIT_0 init 0
+ *
+ * Other messages:
+ *
+ * - PING: Request for all other live nodes to send a PONG. Used at startup to
+ * announce presence, when a node is suspected dead and periodically, in case
+ * segments of the network are [un]plugged.
+ *
+ * - PONG: Response to a PING.
+ *
+ * - ABORT: Request to cancel writing an image.
+ *
+ * - BYE: Notification that this node is shutting down.
+ *
+ * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
+ * nodes which are slower to start up can get state synchronised. If a node
+ * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
+ * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
+ * must invalidate its image (if any) and boot normally.
+ *
+ * Note 2: May occur when one node lost power or powered off while others
+ * hibernated. This node waits for others to complete resuming (ACK_READ)
+ * before completing its boot, so that it appears as a fail node restarting.
+ *
+ * If any node has an image, then it also has a list of nodes that hibernated
+ * in synchronisation with it. The node will wait for other nodes to appear
+ * or timeout before beginning its restoration.
+ *
+ * If a node has no image, it needs to wait, in case other nodes which do have
+ * an image are going to resume, but are taking longer to announce their
+ * presence. For this reason, the user can specify a timeout value and a number
+ * of nodes detected before we just continue. (We might want to assume in a
+ * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
+ * the remaining nodes will too. This might help in situations where some nodes
+ * are much slower to boot, or more subject to hardware failures or such like).
+ */
+
+#include <linux/suspend.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+
+#if 1
+#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
+#else
+#define PRINTK(a, b...) do { } while (0)
+#endif
+
+static int loopback_mode;
+static int num_local_nodes = 1;
+#define MAX_LOCAL_NODES 8
+#define SADDR (loopback_mode ? b->sid : h->saddr)
+
+#define MYNAME "TuxOnIce Clustering"
+
+enum cluster_message {
+ MSG_ACK = 1,
+ MSG_NACK = 2,
+ MSG_PING = 4,
+ MSG_ABORT = 8,
+ MSG_BYE = 16,
+ MSG_HIBERNATE = 32,
+ MSG_IMAGE = 64,
+ MSG_IO = 128,
+ MSG_RUNNING = 256
+};
+
+static char *str_message(int message)
+{
+ switch (message) {
+ case 4:
+ return "Ping";
+ case 8:
+ return "Abort";
+ case 9:
+ return "Abort acked";
+ case 10:
+ return "Abort nacked";
+ case 16:
+ return "Bye";
+ case 17:
+ return "Bye acked";
+ case 18:
+ return "Bye nacked";
+ case 32:
+ return "Hibernate request";
+ case 33:
+ return "Hibernate ack";
+ case 34:
+ return "Hibernate nack";
+ case 64:
+ return "Image exists?";
+ case 65:
+ return "Image does exist";
+ case 66:
+ return "No image here";
+ case 128:
+ return "I/O";
+ case 129:
+ return "I/O okay";
+ case 130:
+ return "I/O failed";
+ case 256:
+ return "Running";
+ default:
+ printk(KERN_ERR "Unrecognised message %d.\n", message);
+ return "Unrecognised message (see dmesg)";
+ }
+}
+
+#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
+#define MSG_STATE_MASK (~MSG_ACK_MASK)
+
+struct node_info {
+ struct list_head member_list;
+ wait_queue_head_t member_events;
+ spinlock_t member_list_lock;
+ spinlock_t receive_lock;
+ int peer_count, ignored_peer_count;
+ struct toi_sysfs_data sysfs_data;
+ enum cluster_message current_message;
+};
+
+struct node_info node_array[MAX_LOCAL_NODES];
+
+struct cluster_member {
+ __be32 addr;
+ enum cluster_message message;
+ struct list_head list;
+ int ignore;
+};
+
+#define toi_cluster_port_send 3501
+#define toi_cluster_port_recv 3502
+
+static struct net_device *net_dev;
+static struct toi_module_ops toi_cluster_ops;
+
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
+
+static struct packet_type toi_cluster_packet_type = {
+ .type = __constant_htons(ETH_P_IP),
+ .func = toi_recv,
+};
+
+struct toi_pkt { /* BOOTP packet format */
+ struct iphdr iph; /* IP header */
+ struct udphdr udph; /* UDP header */
+ u8 htype; /* HW address type */
+ u8 hlen; /* HW address length */
+ __be32 xid; /* Transaction ID */
+ __be16 secs; /* Seconds since we started */
+ __be16 flags; /* Just what it says */
+ u8 hw_addr[16]; /* Sender's HW address */
+ u16 message; /* Message */
+ unsigned long sid; /* Source ID for loopback testing */
+};
+
+static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
+
+static int added_pack;
+
+static int others_have_image;
+
+/* Key used to allow multiple clusters on the same lan */
+static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
+static char pre_hibernate_script[255] =
+ CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
+static char post_hibernate_script[255] =
+ CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
+
+/* List of cluster members */
+static unsigned long continue_delay = 5 * HZ;
+static unsigned long cluster_message_timeout = 3 * HZ;
+
+/* === Membership list === */
+
+static void print_member_info(int index)
+{
+ struct cluster_member *this;
+
+ printk(KERN_INFO "==> Dumping node %d.\n", index);
+
+ list_for_each_entry(this, &node_array[index].member_list, list)
+ printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
+ NIPQUAD(this->addr),
+ str_message(this->message),
+ this->ignore ? "(Ignored)" : "");
+ printk(KERN_INFO "== Done ==\n");
+}
+
+static struct cluster_member *__find_member(int index, __be32 addr)
+{
+ struct cluster_member *this;
+
+ list_for_each_entry(this, &node_array[index].member_list, list) {
+ if (this->addr != addr)
+ continue;
+
+ return this;
+ }
+
+ return NULL;
+}
+
+static void set_ignore(int index, __be32 addr, struct cluster_member *this)
+{
+ if (this->ignore) {
+ PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
+ index, NIPQUAD(addr));
+ return;
+ }
+
+ PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
+ index, NIPQUAD(addr));
+ this->ignore = 1;
+ node_array[index].ignored_peer_count++;
+}
+
+static int __add_update_member(int index, __be32 addr, int message)
+{
+ struct cluster_member *this;
+
+ this = __find_member(index, addr);
+ if (this) {
+ if (this->message != message) {
+ this->message = message;
+ if ((message & MSG_NACK) &&
+ (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+ set_ignore(index, addr, this);
+ PRINTK("Node %d sees node %d.%d.%d.%d now sending "
+ "%s.\n", index, NIPQUAD(addr),
+ str_message(message));
+ wake_up(&node_array[index].member_events);
+ }
+ return 0;
+ }
+
+ this = (struct cluster_member *) toi_kzalloc(36,
+ sizeof(struct cluster_member), GFP_KERNEL);
+
+ if (!this)
+ return -1;
+
+ this->addr = addr;
+ this->message = message;
+ this->ignore = 0;
+ INIT_LIST_HEAD(&this->list);
+
+ node_array[index].peer_count++;
+
+ PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
+ NIPQUAD(addr), str_message(message));
+
+ if ((message & MSG_NACK) &&
+ (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+ set_ignore(index, addr, this);
+ list_add_tail(&this->list, &node_array[index].member_list);
+ return 1;
+}
+
+static int add_update_member(int index, __be32 addr, int message)
+{
+ int result;
+ unsigned long flags;
+ spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+ result = __add_update_member(index, addr, message);
+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+
+ print_member_info(index);
+
+ wake_up(&node_array[index].member_events);
+
+ return result;
+}
+
+static void del_member(int index, __be32 addr)
+{
+ struct cluster_member *this;
+ unsigned long flags;
+
+ spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+ this = __find_member(index, addr);
+
+ if (this) {
+ list_del_init(&this->list);
+ toi_kfree(36, this, sizeof(*this));
+ node_array[index].peer_count--;
+ }
+
+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+/* === Message transmission === */
+
+static void toi_send_if(int message, unsigned long my_id);
+
+/*
+ * Process received TOI packet.
+ */
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ struct toi_pkt *b;
+ struct iphdr *h;
+ int len, result, index;
+ unsigned long addr, message, ack;
+
+ /* Perform verifications before taking the lock. */
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ goto drop;
+
+ if (dev != net_dev)
+ goto drop;
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
+ if (!pskb_may_pull(skb,
+ sizeof(struct iphdr) +
+ sizeof(struct udphdr)))
+ goto drop;
+
+ b = (struct toi_pkt *)skb_network_header(skb);
+ h = &b->iph;
+
+ if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
+ goto drop;
+
+ /* Fragments are not supported */
+ if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
+ if (net_ratelimit())
+ printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
+ "cluster message.\n");
+ goto drop;
+ }
+
+ if (skb->len < ntohs(h->tot_len))
+ goto drop;
+
+ if (ip_fast_csum((char *) h, h->ihl))
+ goto drop;
+
+ if (b->udph.source != htons(toi_cluster_port_send) ||
+ b->udph.dest != htons(toi_cluster_port_recv))
+ goto drop;
+
+ if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+ goto drop;
+
+ len = ntohs(b->udph.len) - sizeof(struct udphdr);
+
+ /* Ok the front looks good, make sure we can get at the rest. */
+ if (!pskb_may_pull(skb, skb->len))
+ goto drop;
+
+ b = (struct toi_pkt *)skb_network_header(skb);
+ h = &b->iph;
+
+ addr = SADDR;
+ PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
+ str_message(b->message), NIPQUAD(addr));
+
+ message = b->message & MSG_STATE_MASK;
+ ack = b->message & MSG_ACK_MASK;
+
+ for (index = 0; index < num_local_nodes; index++) {
+ int new_message = node_array[index].current_message,
+ old_message = new_message;
+
+ if (index == SADDR || !old_message) {
+ PRINTK("Ignoring node %d (offline or self).\n", index);
+ continue;
+ }
+
+ /* One message at a time, please. */
+ spin_lock(&node_array[index].receive_lock);
+
+ result = add_update_member(index, SADDR, b->message);
+ if (result == -1) {
+ printk(KERN_INFO "Failed to add new cluster member "
+ NIPQUAD_FMT ".\n",
+ NIPQUAD(addr));
+ goto drop_unlock;
+ }
+
+ switch (b->message & MSG_STATE_MASK) {
+ case MSG_PING:
+ break;
+ case MSG_ABORT:
+ break;
+ case MSG_BYE:
+ break;
+ case MSG_HIBERNATE:
+ /* Can I hibernate? */
+ new_message = MSG_HIBERNATE |
+ ((index & 1) ? MSG_NACK : MSG_ACK);
+ break;
+ case MSG_IMAGE:
+ /* Can I resume? */
+ new_message = MSG_IMAGE |
+ ((index & 1) ? MSG_NACK : MSG_ACK);
+ if (new_message != old_message)
+ printk(KERN_ERR "Setting whether I can resume "
+ "to %d.\n", new_message);
+ break;
+ case MSG_IO:
+ new_message = MSG_IO | MSG_ACK;
+ break;
+ case MSG_RUNNING:
+ break;
+ default:
+ if (net_ratelimit())
+ printk(KERN_ERR "Unrecognised TuxOnIce cluster"
+ " message %d from " NIPQUAD_FMT ".\n",
+ b->message, NIPQUAD(addr));
+ };
+
+ if (old_message != new_message) {
+ node_array[index].current_message = new_message;
+ printk(KERN_INFO ">>> Sending new message for node "
+ "%d.\n", index);
+ toi_send_if(new_message, index);
+ } else if (!ack) {
+ printk(KERN_INFO ">>> Resending message for node %d.\n",
+ index);
+ toi_send_if(new_message, index);
+ }
+drop_unlock:
+ spin_unlock(&node_array[index].receive_lock);
+ };
+
+drop:
+ /* Throw the packet out. */
+ kfree_skb(skb);
+
+ return 0;
+}
+
+/*
+ * Send cluster message to single interface.
+ */
+static void toi_send_if(int message, unsigned long my_id)
+{
+ struct sk_buff *skb;
+ struct toi_pkt *b;
+ int hh_len = LL_RESERVED_SPACE(net_dev);
+ struct iphdr *h;
+
+ /* Allocate packet */
+ skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
+ if (!skb)
+ return;
+ skb_reserve(skb, hh_len);
+ b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
+ memset(b, 0, sizeof(struct toi_pkt));
+
+ /* Construct IP header */
+ skb_reset_network_header(skb);
+ h = ip_hdr(skb);
+ h->version = 4;
+ h->ihl = 5;
+ h->tot_len = htons(sizeof(struct toi_pkt));
+ h->frag_off = htons(IP_DF);
+ h->ttl = 64;
+ h->protocol = IPPROTO_UDP;
+ h->daddr = htonl(INADDR_BROADCAST);
+ h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+ /* Construct UDP header */
+ b->udph.source = htons(toi_cluster_port_send);
+ b->udph.dest = htons(toi_cluster_port_recv);
+ b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
+ /* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+ /* Construct message */
+ b->message = message;
+ b->sid = my_id;
+ b->htype = net_dev->type; /* can cause undefined behavior */
+ b->hlen = net_dev->addr_len;
+ memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
+ b->secs = htons(3); /* 3 seconds */
+
+ /* Chain packet down the line... */
+ skb->dev = net_dev;
+ skb->protocol = htons(ETH_P_IP);
+ if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
+ net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
+ dev_queue_xmit(skb) < 0)
+ printk(KERN_INFO "E");
+}
+
+/* ========================================= */
+
+/* kTOICluster */
+
+static atomic_t num_cluster_threads;
+static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
+
+static int kTOICluster(void *data)
+{
+ unsigned long my_id;
+
+ my_id = atomic_add_return(1, &num_cluster_threads) - 1;
+ node_array[my_id].current_message = (unsigned long) data;
+
+ PRINTK("kTOICluster daemon %lu starting.\n", my_id);
+
+ current->flags |= PF_NOFREEZE;
+
+ while (node_array[my_id].current_message) {
+ toi_send_if(node_array[my_id].current_message, my_id);
+ sleep_on_timeout(&clusterd_events,
+ cluster_message_timeout);
+ PRINTK("Link state %lu is %d.\n", my_id,
+ node_array[my_id].current_message);
+ }
+
+ toi_send_if(MSG_BYE, my_id);
+ atomic_dec(&num_cluster_threads);
+ wake_up(&clusterd_events);
+
+ PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+static void kill_clusterd(void)
+{
+ int i;
+
+ for (i = 0; i < num_local_nodes; i++) {
+ if (node_array[i].current_message) {
+ PRINTK("Seeking to kill clusterd %d.\n", i);
+ node_array[i].current_message = 0;
+ }
+ }
+ wait_event(clusterd_events,
+ !atomic_read(&num_cluster_threads));
+ PRINTK("All cluster daemons have exited.\n");
+}
+
+static int peers_not_in_message(int index, int message, int precise)
+{
+ struct cluster_member *this;
+ unsigned long flags;
+ int result = 0;
+
+ spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+ list_for_each_entry(this, &node_array[index].member_list, list) {
+ if (this->ignore)
+ continue;
+
+ PRINTK("Peer %d.%d.%d.%d sending %s. "
+ "Seeking %s.\n",
+ NIPQUAD(this->addr),
+ str_message(this->message), str_message(message));
+ if ((precise ? this->message :
+ this->message & MSG_STATE_MASK) !=
+ message)
+ result++;
+ }
+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+ PRINTK("%d peers in sought message.\n", result);
+ return result;
+}
+
+static void reset_ignored(int index)
+{
+ struct cluster_member *this;
+ unsigned long flags;
+
+ spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+ list_for_each_entry(this, &node_array[index].member_list, list)
+ this->ignore = 0;
+ node_array[index].ignored_peer_count = 0;
+ spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+static int peers_in_message(int index, int message, int precise)
+{
+ return node_array[index].peer_count -
+ node_array[index].ignored_peer_count -
+ peers_not_in_message(index, message, precise);
+}
+
+static int time_to_continue(int index, unsigned long start, int message)
+{
+ int first = peers_not_in_message(index, message, 0);
+ int second = peers_in_message(index, message, 1);
+
+ PRINTK("First part returns %d, second returns %d.\n", first, second);
+
+ if (!first && !second) {
+ PRINTK("All peers answered message %d.\n",
+ message);
+ return 1;
+ }
+
+ if (time_after(jiffies, start + continue_delay)) {
+ PRINTK("Timeout reached.\n");
+ return 1;
+ }
+
+ PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
+ start + continue_delay);
+ return 0;
+}
+
+void toi_initiate_cluster_hibernate(void)
+{
+ int result;
+ unsigned long start;
+
+ result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+ if (result)
+ return;
+
+ toi_send_if(MSG_HIBERNATE, 0);
+
+ start = jiffies;
+ wait_event(node_array[0].member_events,
+ time_to_continue(0, start, MSG_HIBERNATE));
+
+ if (test_action_state(TOI_FREEZER_TEST)) {
+ toi_send_if(MSG_ABORT, 0);
+
+ start = jiffies;
+ wait_event(node_array[0].member_events,
+ time_to_continue(0, start, MSG_RUNNING));
+
+ do_toi_step(STEP_QUIET_CLEANUP);
+ return;
+ }
+
+ toi_send_if(MSG_IO, 0);
+
+ result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+ if (result)
+ return;
+
+ /* This code runs at resume time too! */
+ if (toi_in_hibernate)
+ result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+}
+
+/* toi_cluster_print_debug_stats
+ *
+ * Description: Print information to be recorded for debugging purposes into a
+ * buffer.
+ * Arguments: buffer: Pointer to a buffer into which the debug info will be
+ * printed.
+ * size: Size of the buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+static int toi_cluster_print_debug_stats(char *buffer, int size)
+{
+ int len;
+
+ if (strlen(toi_cluster_iface))
+ len = scnprintf(buffer, size,
+ "- Cluster interface is '%s'.\n",
+ toi_cluster_iface);
+ else
+ len = scnprintf(buffer, size,
+ "- Cluster support is disabled.\n");
+ return len;
+}
+
+/* cluster_memory_needed
+ *
+ * Description: Tell the caller how much memory we need to operate during
+ * hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_cluster_memory_needed(void)
+{
+ return 0;
+}
+
+static int toi_cluster_storage_needed(void)
+{
+ return 1 + strlen(toi_cluster_iface);
+}
+
+/* toi_cluster_save_config_info
+ *
+ * Description: Save informaton needed when reloading the image at resume time.
+ * Arguments: Buffer: Pointer to a buffer of size PAGE_SIZE.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_cluster_save_config_info(char *buffer)
+{
+ strcpy(buffer, toi_cluster_iface);
+ return strlen(toi_cluster_iface + 1);
+}
+
+/* toi_cluster_load_config_info
+ *
+ * Description: Reload information needed for declustering the image at
+ * resume time.
+ * Arguments: Buffer: Pointer to the start of the data.
+ * Size: Number of bytes that were saved.
+ */
+static void toi_cluster_load_config_info(char *buffer, int size)
+{
+ strncpy(toi_cluster_iface, buffer, size);
+ return;
+}
+
+static void cluster_startup(void)
+{
+ int have_image = do_check_can_resume(), i;
+ unsigned long start = jiffies, initial_message;
+ struct task_struct *p;
+
+ initial_message = MSG_IMAGE;
+
+ have_image = 1;
+
+ for (i = 0; i < num_local_nodes; i++) {
+ PRINTK("Starting ktoiclusterd %d.\n", i);
+ p = kthread_create(kTOICluster, (void *) initial_message,
+ "ktoiclusterd/%d", i);
+ if (IS_ERR(p)) {
+ printk(KERN_ERR "Failed to start ktoiclusterd.\n");
+ return;
+ }
+
+ wake_up_process(p);
+ }
+
+ /* Wait for delay or someone else sending first message */
+ wait_event(node_array[0].member_events, time_to_continue(0, start,
+ MSG_IMAGE));
+
+ others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
+
+ printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
+ " %d.\n", have_image ? "" : "don't ", others_have_image);
+
+ if (have_image) {
+ int result;
+
+ /* Start to resume */
+ printk(KERN_INFO " === Starting to resume === \n");
+ node_array[0].current_message = MSG_IO;
+ toi_send_if(MSG_IO, 0);
+
+ /* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
+ result = 0;
+
+ if (!result) {
+ /*
+ * Atomic restore - we'll come back in the hibernation
+ * path.
+ */
+
+ /* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
+ result = 0;
+
+ /* do_toi_step(STEP_QUIET_CLEANUP); */
+ }
+
+ node_array[0].current_message |= MSG_NACK;
+
+ /* For debugging - disable for real life? */
+ wait_event(node_array[0].member_events,
+ time_to_continue(0, start, MSG_IO));
+ }
+
+ if (others_have_image) {
+ /* Wait for them to resume */
+ printk(KERN_INFO "Waiting for other nodes to resume.\n");
+ start = jiffies;
+ wait_event(node_array[0].member_events,
+ time_to_continue(0, start, MSG_RUNNING));
+ if (peers_not_in_message(0, MSG_RUNNING, 0))
+ printk(KERN_INFO "Timed out while waiting for other "
+ "nodes to resume.\n");
+ }
+
+ /* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
+ * as appropriate.
+ *
+ * If we don't have an image:
+ * - Wait until someone else says they have one, or conditions are met
+ * for continuing to boot (n machines or t seconds).
+ * - If anyone has an image, wait for them to resume before continuing
+ * to boot.
+ *
+ * If we have an image:
+ * - Wait until conditions are met before continuing to resume (n
+ * machines or t seconds). Send RESUME_PREP and freeze processes.
+ * NACK_PREP if freezing fails (shouldn't) and follow logic for
+ * us having no image above. On success, wait for [N]ACK_PREP from
+ * other machines. Read image (including atomic restore) until done.
+ * Wait for ACK_READ from others (should never fail). Thaw processes
+ * and do post-resume. (The section after the atomic restore is done
+ * via the code for hibernating).
+ */
+
+ node_array[0].current_message = MSG_RUNNING;
+}
+
+/* toi_cluster_open_iface
+ *
+ * Description: Prepare to use an interface.
+ */
+
+static int toi_cluster_open_iface(void)
+{
+ struct net_device *dev;
+
+ rtnl_lock();
+
+ for_each_netdev(&init_net, dev) {
+ if (/* dev == &init_net.loopback_dev || */
+ strcmp(dev->name, toi_cluster_iface))
+ continue;
+
+ net_dev = dev;
+ break;
+ }
+
+ rtnl_unlock();
+
+ if (!net_dev) {
+ printk(KERN_ERR MYNAME ": Device %s not found.\n",
+ toi_cluster_iface);
+ return -ENODEV;
+ }
+
+ dev_add_pack(&toi_cluster_packet_type);
+ added_pack = 1;
+
+ loopback_mode = (net_dev == init_net.loopback_dev);
+ num_local_nodes = loopback_mode ? 8 : 1;
+
+ PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
+ loopback_mode ? "on" : "off", num_local_nodes);
+
+ cluster_startup();
+ return 0;
+}
+
+/* toi_cluster_close_iface
+ *
+ * Description: Stop using an interface.
+ */
+
+static int toi_cluster_close_iface(void)
+{
+ kill_clusterd();
+ if (added_pack) {
+ dev_remove_pack(&toi_cluster_packet_type);
+ added_pack = 0;
+ }
+ return 0;
+}
+
+static void write_side_effect(void)
+{
+ if (toi_cluster_ops.enabled) {
+ toi_cluster_open_iface();
+ set_toi_state(TOI_CLUSTER_MODE);
+ } else {
+ toi_cluster_close_iface();
+ clear_toi_state(TOI_CLUSTER_MODE);
+ }
+}
+
+static void node_write_side_effect(void)
+{
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_STRING("interface", SYSFS_RW, toi_cluster_iface, IFNAMSIZ, 0,
+ NULL),
+ SYSFS_INT("enabled", SYSFS_RW, &toi_cluster_ops.enabled, 0, 1, 0,
+ write_side_effect),
+ SYSFS_STRING("cluster_name", SYSFS_RW, toi_cluster_key, 32, 0, NULL),
+ SYSFS_STRING("pre-hibernate-script", SYSFS_RW, pre_hibernate_script,
+ 256, 0, NULL),
+ SYSFS_STRING("post-hibernate-script", SYSFS_RW, post_hibernate_script,
+ 256, 0, STRING),
+ SYSFS_UL("continue_delay", SYSFS_RW, &continue_delay, HZ / 2, 60 * HZ,
+ 0)
+};
+
+/*
+ * Ops structure.
+ */
+
+static struct toi_module_ops toi_cluster_ops = {
+ .type = FILTER_MODULE,
+ .name = "Cluster",
+ .directory = "cluster",
+ .module = THIS_MODULE,
+ .memory_needed = toi_cluster_memory_needed,
+ .print_debug_info = toi_cluster_print_debug_stats,
+ .save_config_info = toi_cluster_save_config_info,
+ .load_config_info = toi_cluster_load_config_info,
+ .storage_needed = toi_cluster_storage_needed,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+#ifdef MODULE
+#define INIT static __init
+#define EXIT static __exit
+#else
+#define INIT
+#define EXIT
+#endif
+
+INIT int toi_cluster_init(void)
+{
+ int temp = toi_register_module(&toi_cluster_ops), i;
+ struct kobject *kobj = toi_cluster_ops.dir_kobj;
+
+ for (i = 0; i < MAX_LOCAL_NODES; i++) {
+ node_array[i].current_message = 0;
+ INIT_LIST_HEAD(&node_array[i].member_list);
+ init_waitqueue_head(&node_array[i].member_events);
+ spin_lock_init(&node_array[i].member_list_lock);
+ spin_lock_init(&node_array[i].receive_lock);
+
+ /* Set up sysfs entry */
+ node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
+ sizeof(node_array[i].sysfs_data.attr.name),
+ GFP_KERNEL);
+ sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
+ i);
+ node_array[i].sysfs_data.attr.mode = SYSFS_RW;
+ node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
+ node_array[i].sysfs_data.flags = 0;
+ node_array[i].sysfs_data.data.integer.variable =
+ (int *) &node_array[i].current_message;
+ node_array[i].sysfs_data.data.integer.minimum = 0;
+ node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
+ node_array[i].sysfs_data.write_side_effect =
+ node_write_side_effect;
+ toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
+ }
+
+ toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
+
+ if (toi_cluster_ops.enabled)
+ toi_cluster_open_iface();
+
+ return temp;
+}
+
+EXIT void toi_cluster_exit(void)
+{
+ int i;
+ toi_cluster_close_iface();
+
+ for (i = 0; i < MAX_LOCAL_NODES; i++)
+ toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
+ &node_array[i].sysfs_data);
+ toi_unregister_module(&toi_cluster_ops);
+}
+
+static int __init toi_cluster_iface_setup(char *iface)
+{
+ toi_cluster_ops.enabled = (*iface &&
+ strcmp(iface, "off"));
+
+ if (toi_cluster_ops.enabled)
+ strncpy(toi_cluster_iface, iface, strlen(iface));
+}
+
+__setup("toi_cluster=", toi_cluster_iface_setup);
diff --git a/kernel/power/tuxonice_cluster.h b/kernel/power/tuxonice_cluster.h
new file mode 100644
index 000000000..84356b304
--- /dev/null
+++ b/kernel/power/tuxonice_cluster.h
@@ -0,0 +1,18 @@
+/*
+ * kernel/power/tuxonice_cluster.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_TOI_CLUSTER
+extern int toi_cluster_init(void);
+extern void toi_cluster_exit(void);
+extern void toi_initiate_cluster_hibernate(void);
+#else
+static inline int toi_cluster_init(void) { return 0; }
+static inline void toi_cluster_exit(void) { }
+static inline void toi_initiate_cluster_hibernate(void) { }
+#endif
+
diff --git a/kernel/power/tuxonice_compress.c b/kernel/power/tuxonice_compress.c
new file mode 100644
index 000000000..d118568b7
--- /dev/null
+++ b/kernel/power/tuxonice_compress.c
@@ -0,0 +1,452 @@
+/*
+ * kernel/power/compression.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data compression routines for TuxOnIce,
+ * using cryptoapi.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+static int toi_expected_compression;
+
+static struct toi_module_ops toi_compression_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_compressor_name[32] = "lzo";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+ u8 *page_buffer;
+ struct crypto_comp *transform;
+ unsigned int len;
+ u8 *buffer_start;
+ u8 *output_buffer;
+};
+
+#define OUT_BUF_SIZE (2 * PAGE_SIZE)
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_compress_crypto_prepare(void)
+{
+ int cpu;
+
+ if (!*toi_compressor_name) {
+ printk(KERN_INFO "TuxOnIce: Compression enabled but no "
+ "compressor name set.\n");
+ return 1;
+ }
+
+ for_each_online_cpu(cpu) {
+ struct cpu_context *this = &per_cpu(contexts, cpu);
+ this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
+ if (IS_ERR(this->transform)) {
+ printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+ "%s compression transform.\n",
+ toi_compressor_name);
+ this->transform = NULL;
+ return 1;
+ }
+
+ this->page_buffer =
+ (char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
+
+ if (!this->page_buffer) {
+ printk(KERN_ERR
+ "Failed to allocate a page buffer for TuxOnIce "
+ "compression driver.\n");
+ return -ENOMEM;
+ }
+
+ this->output_buffer =
+ (char *) vmalloc_32(OUT_BUF_SIZE);
+
+ if (!this->output_buffer) {
+ printk(KERN_ERR
+ "Failed to allocate a output buffer for TuxOnIce "
+ "compression driver.\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int toi_compress_rw_cleanup(int writing)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ struct cpu_context *this = &per_cpu(contexts, cpu);
+ if (this->transform) {
+ crypto_free_comp(this->transform);
+ this->transform = NULL;
+ }
+
+ if (this->page_buffer)
+ toi_free_page(16, (unsigned long) this->page_buffer);
+
+ this->page_buffer = NULL;
+
+ if (this->output_buffer)
+ vfree(this->output_buffer);
+
+ this->output_buffer = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * toi_compress_init
+ */
+
+static int toi_compress_init(int toi_or_resume)
+{
+ if (!toi_or_resume)
+ return 0;
+
+ toi_compress_bytes_in = 0;
+ toi_compress_bytes_out = 0;
+
+ next_driver = toi_get_next_filter(&toi_compression_ops);
+
+ return next_driver ? 0 : -ECHILD;
+}
+
+/*
+ * toi_compress_rw_init()
+ */
+
+static int toi_compress_rw_init(int rw, int stream_number)
+{
+ if (toi_compress_crypto_prepare()) {
+ printk(KERN_ERR "Failed to initialise compression "
+ "algorithm.\n");
+ if (rw == READ) {
+ printk(KERN_INFO "Unable to read the image.\n");
+ return -ENODEV;
+ } else {
+ printk(KERN_INFO "Continuing without "
+ "compressing the image.\n");
+ toi_compression_ops.enabled = 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * toi_compress_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be compressed.
+ *
+ * Returns: 0 on success. Otherwise the error is that returned by later
+ * modules, -ECHILD if we have a broken pipeline or -EIO if
+ * zlib errs.
+ */
+static int toi_compress_write_page(unsigned long index, int buf_type,
+ void *buffer_page, unsigned int buf_size)
+{
+ int ret = 0, cpu = smp_processor_id();
+ struct cpu_context *ctx = &per_cpu(contexts, cpu);
+ u8* output_buffer = buffer_page;
+ int output_len = buf_size;
+ int out_buf_type = buf_type;
+
+ if (ctx->transform) {
+
+ ctx->buffer_start = TOI_MAP(buf_type, buffer_page);
+ ctx->len = OUT_BUF_SIZE;
+
+ ret = crypto_comp_compress(ctx->transform,
+ ctx->buffer_start, buf_size,
+ ctx->output_buffer, &ctx->len);
+
+ TOI_UNMAP(buf_type, buffer_page);
+
+ toi_message(TOI_COMPRESS, TOI_VERBOSE, 0,
+ "CPU %d, index %lu: %d bytes",
+ cpu, index, ctx->len);
+
+ if (!ret && ctx->len < buf_size) { /* some compression */
+ output_buffer = ctx->output_buffer;
+ output_len = ctx->len;
+ out_buf_type = TOI_VIRT;
+ }
+
+ }
+
+ mutex_lock(&stats_lock);
+
+ toi_compress_bytes_in += buf_size;
+ toi_compress_bytes_out += output_len;
+
+ mutex_unlock(&stats_lock);
+
+ if (!ret)
+ ret = next_driver->write_page(index, out_buf_type,
+ output_buffer, output_len);
+
+ return ret;
+}
+
+/*
+ * toi_compress_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules and decompress it until the input buffer
+ * is filled.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_compress_read_page(unsigned long *index, int buf_type,
+ void *buffer_page, unsigned int *buf_size)
+{
+ int ret, cpu = smp_processor_id();
+ unsigned int len;
+ unsigned int outlen = PAGE_SIZE;
+ char *buffer_start;
+ struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+ if (!ctx->transform)
+ return next_driver->read_page(index, TOI_PAGE, buffer_page,
+ buf_size);
+
+ /*
+ * All our reads must be synchronous - we can't decompress
+ * data that hasn't been read yet.
+ */
+
+ ret = next_driver->read_page(index, TOI_VIRT, ctx->page_buffer, &len);
+
+ buffer_start = kmap(buffer_page);
+
+ /* Error or uncompressed data */
+ if (ret || len == PAGE_SIZE) {
+ memcpy(buffer_start, ctx->page_buffer, len);
+ goto out;
+ }
+
+ ret = crypto_comp_decompress(
+ ctx->transform,
+ ctx->page_buffer,
+ len, buffer_start, &outlen);
+
+ toi_message(TOI_COMPRESS, TOI_VERBOSE, 0,
+ "CPU %d, index %lu: %d=>%d (%d).",
+ cpu, *index, len, outlen, ret);
+
+ if (ret)
+ abort_hibernate(TOI_FAILED_IO,
+ "Compress_read returned %d.\n", ret);
+ else if (outlen != PAGE_SIZE) {
+ abort_hibernate(TOI_FAILED_IO,
+ "Decompression yielded %d bytes instead of %ld.\n",
+ outlen, PAGE_SIZE);
+ printk(KERN_ERR "Decompression yielded %d bytes instead of "
+ "%ld.\n", outlen, PAGE_SIZE);
+ ret = -EIO;
+ *buf_size = outlen;
+ }
+out:
+ TOI_UNMAP(buf_type, buffer_page);
+ return ret;
+}
+
+/*
+ * toi_compress_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_compress_print_debug_stats(char *buffer, int size)
+{
+ unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
+ pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
+ int len;
+
+ /* Output the compression ratio achieved. */
+ if (*toi_compressor_name)
+ len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
+ toi_compressor_name);
+ else
+ len = scnprintf(buffer, size, "- Compressor is not set.\n");
+
+ if (pages_in)
+ len += scnprintf(buffer+len, size - len, " Compressed "
+ "%lu bytes into %lu (%ld percent compression).\n",
+ toi_compress_bytes_in,
+ toi_compress_bytes_out,
+ (pages_in - pages_out) * 100 / pages_in);
+ return len;
+}
+
+/*
+ * toi_compress_compression_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_compress_memory_needed(void)
+{
+ return 2 * PAGE_SIZE;
+}
+
+static int toi_compress_storage_needed(void)
+{
+ return 2 * sizeof(unsigned long) + 2 * sizeof(int) +
+ strlen(toi_compressor_name) + 1;
+}
+
+/*
+ * toi_compress_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_compress_save_config_info(char *buffer)
+{
+ int len = strlen(toi_compressor_name) + 1, offset = 0;
+
+ *((unsigned long *) buffer) = toi_compress_bytes_in;
+ offset += sizeof(unsigned long);
+ *((unsigned long *) (buffer + offset)) = toi_compress_bytes_out;
+ offset += sizeof(unsigned long);
+ *((int *) (buffer + offset)) = toi_expected_compression;
+ offset += sizeof(int);
+ *((int *) (buffer + offset)) = len;
+ offset += sizeof(int);
+ strncpy(buffer + offset, toi_compressor_name, len);
+ return offset + len;
+}
+
+/* toi_compress_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description: Reload information needed for decompressing the image at
+ * resume time.
+ */
+static void toi_compress_load_config_info(char *buffer, int size)
+{
+ int len, offset = 0;
+
+ toi_compress_bytes_in = *((unsigned long *) buffer);
+ offset += sizeof(unsigned long);
+ toi_compress_bytes_out = *((unsigned long *) (buffer + offset));
+ offset += sizeof(unsigned long);
+ toi_expected_compression = *((int *) (buffer + offset));
+ offset += sizeof(int);
+ len = *((int *) (buffer + offset));
+ offset += sizeof(int);
+ strncpy(toi_compressor_name, buffer + offset, len);
+}
+
+static void toi_compress_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+ bkd->compress_bytes_in = toi_compress_bytes_in;
+ bkd->compress_bytes_out = toi_compress_bytes_out;
+}
+
+static void toi_compress_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+ toi_compress_bytes_in = bkd->compress_bytes_in;
+ toi_compress_bytes_out = bkd->compress_bytes_out;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Description: Returns the expected ratio between data passed into this module
+ * and the amount of data output when writing.
+ * Returns: 100 if the module is disabled. Otherwise the value set by the
+ * user via our sysfs entry.
+ */
+
+static int toi_compress_expected_ratio(void)
+{
+ if (!toi_compression_ops.enabled)
+ return 100;
+ else
+ return 100 - toi_expected_compression;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_INT("expected_compression", SYSFS_RW, &toi_expected_compression,
+ 0, 99, 0, NULL),
+ SYSFS_INT("enabled", SYSFS_RW, &toi_compression_ops.enabled, 0, 1, 0,
+ NULL),
+ SYSFS_STRING("algorithm", SYSFS_RW, toi_compressor_name, 31, 0, NULL),
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_compression_ops = {
+ .type = FILTER_MODULE,
+ .name = "compression",
+ .directory = "compression",
+ .module = THIS_MODULE,
+ .initialise = toi_compress_init,
+ .memory_needed = toi_compress_memory_needed,
+ .print_debug_info = toi_compress_print_debug_stats,
+ .save_config_info = toi_compress_save_config_info,
+ .load_config_info = toi_compress_load_config_info,
+ .storage_needed = toi_compress_storage_needed,
+ .expected_compression = toi_compress_expected_ratio,
+
+ .pre_atomic_restore = toi_compress_pre_atomic_restore,
+ .post_atomic_restore = toi_compress_post_atomic_restore,
+
+ .rw_init = toi_compress_rw_init,
+ .rw_cleanup = toi_compress_rw_cleanup,
+
+ .write_page = toi_compress_write_page,
+ .read_page = toi_compress_read_page,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_compress_load(void)
+{
+ return toi_register_module(&toi_compression_ops);
+}
+
+late_initcall(toi_compress_load);
diff --git a/kernel/power/tuxonice_copy_before_write.c b/kernel/power/tuxonice_copy_before_write.c
new file mode 100644
index 000000000..dc02a4acf
--- /dev/null
+++ b/kernel/power/tuxonice_copy_before_write.c
@@ -0,0 +1,240 @@
+/*
+ * kernel/power/tuxonice_copy_before_write.c
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines (apart from the fault handling code) to deal with allocating memory
+ * for copying pages before they are modified, restoring the contents and getting
+ * the contents written to disk.
+ */
+
+#include <linux/percpu-defs.h>
+#include <linux/sched.h>
+#include <linux/tuxonice.h>
+#include "tuxonice_alloc.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+
+DEFINE_PER_CPU(struct toi_cbw_state, toi_cbw_states);
+#define CBWS_PER_PAGE (PAGE_SIZE / sizeof(struct toi_cbw))
+#define toi_cbw_pool_size 100
+
+static void _toi_free_cbw_data(struct toi_cbw_state *state)
+{
+ struct toi_cbw *page_ptr, *ptr, *next;
+
+ page_ptr = ptr = state->first;
+
+ while(ptr) {
+ next = ptr->next;
+
+ if (ptr->virt) {
+ toi__free_page(40, virt_to_page(ptr->virt));
+ }
+ if ((((unsigned long) ptr) & PAGE_MASK) != (unsigned long) page_ptr) {
+ /* Must be on a new page - free the previous one. */
+ toi__free_page(40, virt_to_page(page_ptr));
+ page_ptr = ptr;
+ }
+ ptr = next;
+ }
+
+ if (page_ptr) {
+ toi__free_page(40, virt_to_page(page_ptr));
+ }
+
+ state->first = state->next = state->last = NULL;
+ state->size = 0;
+}
+
+void toi_free_cbw_data(void)
+{
+ int i;
+
+ for_each_online_cpu(i) {
+ struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+
+ if (!state->first)
+ continue;
+
+ state->enabled = 0;
+
+ while (state->active) {
+ schedule();
+ }
+
+ _toi_free_cbw_data(state);
+ }
+}
+
+static int _toi_allocate_cbw_data(struct toi_cbw_state *state)
+{
+ while(state->size < toi_cbw_pool_size) {
+ int i;
+ struct toi_cbw *ptr;
+
+ ptr = (struct toi_cbw *) toi_get_zeroed_page(40, GFP_KERNEL);
+
+ if (!ptr) {
+ return -ENOMEM;
+ }
+
+ if (!state->first) {
+ state->first = state->next = state->last = ptr;
+ }
+
+ for (i = 0; i < CBWS_PER_PAGE; i++) {
+ struct toi_cbw *cbw = &ptr[i];
+
+ cbw->virt = (char *) toi_get_zeroed_page(40, GFP_KERNEL);
+ if (!cbw->virt) {
+ state->size += i;
+ printk("Out of memory allocating CBW pages.\n");
+ return -ENOMEM;
+ }
+
+ if (cbw == state->first)
+ continue;
+
+ state->last->next = cbw;
+ state->last = cbw;
+ }
+
+ state->size += CBWS_PER_PAGE;
+ }
+
+ state->enabled = 1;
+
+ return 0;
+}
+
+
+int toi_allocate_cbw_data(void)
+{
+ int i, result;
+
+ for_each_online_cpu(i) {
+ struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+
+ result = _toi_allocate_cbw_data(state);
+
+ if (result)
+ return result;
+ }
+
+ return 0;
+}
+
+void toi_cbw_restore(void)
+{
+ if (!toi_keeping_image)
+ return;
+
+}
+
+void toi_cbw_write(void)
+{
+ if (!toi_keeping_image)
+ return;
+
+}
+
+/**
+ * toi_cbw_test_read - Test copy before write on one page
+ *
+ * Allocate copy before write buffers, then make one page only copy-before-write
+ * and attempt to write to it. We should then be able to retrieve the original
+ * version from the cbw buffer and the modified version from the page itself.
+ */
+static int toi_cbw_test_read(const char *buffer, int count)
+{
+ unsigned long virt = toi_get_zeroed_page(40, GFP_KERNEL);
+ char *original = "Original contents";
+ char *modified = "Modified material";
+ struct page *page = virt_to_page(virt);
+ int i, len = 0, found = 0, pfn = page_to_pfn(page);
+
+ if (!page) {
+ printk("toi_cbw_test_read: Unable to allocate a page for testing.\n");
+ return -ENOMEM;
+ }
+
+ memcpy((char *) virt, original, strlen(original));
+
+ if (toi_allocate_cbw_data()) {
+ printk("toi_cbw_test_read: Unable to allocate cbw data.\n");
+ return -ENOMEM;
+ }
+
+ toi_reset_dirtiness_one(pfn, 0);
+
+ SetPageTOI_CBW(page);
+
+ memcpy((char *) virt, modified, strlen(modified));
+
+ if (strncmp((char *) virt, modified, strlen(modified))) {
+ len += sprintf((char *) buffer + len, "Failed to write to page after protecting it.\n");
+ }
+
+ for_each_online_cpu(i) {
+ struct toi_cbw_state *state = &per_cpu(toi_cbw_states, i);
+ struct toi_cbw *ptr = state->first, *last_ptr = ptr;
+
+ if (!found) {
+ while (ptr) {
+ if (ptr->pfn == pfn) {
+ found = 1;
+ if (strncmp(ptr->virt, original, strlen(original))) {
+ len += sprintf((char *) buffer + len, "Contents of original buffer are not original.\n");
+ } else {
+ len += sprintf((char *) buffer + len, "Test passed. Buffer changed and original contents preserved.\n");
+ }
+ break;
+ }
+
+ last_ptr = ptr;
+ ptr = ptr->next;
+ }
+ }
+
+ if (!last_ptr)
+ len += sprintf((char *) buffer + len, "All available CBW buffers on cpu %d used.\n", i);
+ }
+
+ if (!found)
+ len += sprintf((char *) buffer + len, "Copy before write buffer not found.\n");
+
+ toi_free_cbw_data();
+
+ return len;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_CUSTOM("test", SYSFS_RW, toi_cbw_test_read,
+ NULL, SYSFS_NEEDS_SM_FOR_READ, NULL),
+};
+
+static struct toi_module_ops toi_cbw_ops = {
+ .type = MISC_HIDDEN_MODULE,
+ .name = "copy_before_write debugging",
+ .directory = "cbw",
+ .module = THIS_MODULE,
+ .early = 1,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+int toi_cbw_init(void)
+{
+ int result = toi_register_module(&toi_cbw_ops);
+ return result;
+}
diff --git a/kernel/power/tuxonice_extent.c b/kernel/power/tuxonice_extent.c
new file mode 100644
index 000000000..3b558b220
--- /dev/null
+++ b/kernel/power/tuxonice_extent.c
@@ -0,0 +1,144 @@
+/*
+ * kernel/power/tuxonice_extent.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * These functions encapsulate the manipulation of storage metadata.
+ */
+
+#include <linux/suspend.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+
+/**
+ * toi_get_extent - return a free extent
+ *
+ * May fail, returning NULL instead.
+ **/
+static struct hibernate_extent *toi_get_extent(void)
+{
+ return (struct hibernate_extent *) toi_kzalloc(2,
+ sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
+}
+
+/**
+ * toi_put_extent_chain - free a chain of extents starting from value 'from'
+ * @chain: Chain to free.
+ *
+ * Note that 'from' is an extent value, and may be part way through an extent.
+ * In this case, the extent should be truncated (if necessary) and following
+ * extents freed.
+ **/
+void toi_put_extent_chain_from(struct hibernate_extent_chain *chain, unsigned long from)
+{
+ struct hibernate_extent *this;
+
+ this = chain->first;
+
+ while (this) {
+ struct hibernate_extent *next = this->next;
+
+ // Delete the whole extent?
+ if (this->start >= from) {
+ chain->size -= (this->end - this->start + 1);
+ if (chain->first == this)
+ chain->first = next;
+ if (chain->last_touched == this)
+ chain->last_touched = NULL;
+ if (chain->current_extent == this)
+ chain->current_extent = NULL;
+ toi_kfree(2, this, sizeof(*this));
+ chain->num_extents--;
+ } else if (this->end >= from) {
+ // Delete part of the extent
+ chain->size -= (this->end - from + 1);
+ this->start = from;
+ }
+ this = next;
+ }
+}
+
+/**
+ * toi_put_extent_chain - free a whole chain of extents
+ * @chain: Chain to free.
+ **/
+void toi_put_extent_chain(struct hibernate_extent_chain *chain)
+{
+ toi_put_extent_chain_from(chain, 0);
+}
+
+/**
+ * toi_add_to_extent_chain - add an extent to an existing chain
+ * @chain: Chain to which the extend should be added
+ * @start: Start of the extent (first physical block)
+ * @end: End of the extent (last physical block)
+ *
+ * The chain information is updated if the insertion is successful.
+ **/
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+ unsigned long start, unsigned long end)
+{
+ struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0,
+ "Adding extent %lu-%lu to chain %p.\n", start, end, chain);
+
+ /* Find the right place in the chain */
+ if (chain->last_touched && chain->last_touched->start < start)
+ cur_ext = chain->last_touched;
+ else if (chain->first && chain->first->start < start)
+ cur_ext = chain->first;
+
+ if (cur_ext) {
+ while (cur_ext->next && cur_ext->next->start < start)
+ cur_ext = cur_ext->next;
+
+ if (cur_ext->end == (start - 1)) {
+ struct hibernate_extent *next_ext = cur_ext->next;
+ cur_ext->end = end;
+
+ /* Merge with the following one? */
+ if (next_ext && cur_ext->end + 1 == next_ext->start) {
+ cur_ext->end = next_ext->end;
+ cur_ext->next = next_ext->next;
+ toi_kfree(2, next_ext, sizeof(*next_ext));
+ chain->num_extents--;
+ }
+
+ chain->last_touched = cur_ext;
+ chain->size += (end - start + 1);
+
+ return 0;
+ }
+ }
+
+ new_ext = toi_get_extent();
+ if (!new_ext) {
+ printk(KERN_INFO "Error unable to append a new extent to the "
+ "chain.\n");
+ return -ENOMEM;
+ }
+
+ chain->num_extents++;
+ chain->size += (end - start + 1);
+ new_ext->start = start;
+ new_ext->end = end;
+
+ chain->last_touched = new_ext;
+
+ if (cur_ext) {
+ new_ext->next = cur_ext->next;
+ cur_ext->next = new_ext;
+ } else {
+ if (chain->first)
+ new_ext->next = chain->first;
+ chain->first = new_ext;
+ }
+
+ return 0;
+}
diff --git a/kernel/power/tuxonice_extent.h b/kernel/power/tuxonice_extent.h
new file mode 100644
index 000000000..cf1289efc
--- /dev/null
+++ b/kernel/power/tuxonice_extent.h
@@ -0,0 +1,45 @@
+/*
+ * kernel/power/tuxonice_extent.h
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations related to extents. Extents are
+ * TuxOnIce's method of storing some of the metadata for the image.
+ * See tuxonice_extent.c for more info.
+ *
+ */
+
+#include "tuxonice_modules.h"
+
+#ifndef EXTENT_H
+#define EXTENT_H
+
+struct hibernate_extent {
+ unsigned long start, end;
+ struct hibernate_extent *next;
+};
+
+struct hibernate_extent_chain {
+ unsigned long size; /* size of the chain ie sum (max-min+1) */
+ int num_extents;
+ struct hibernate_extent *first, *last_touched;
+ struct hibernate_extent *current_extent;
+ unsigned long current_offset;
+};
+
+/* Simplify iterating through all the values in an extent chain */
+#define toi_extent_for_each(extent_chain, extentpointer, value) \
+if ((extent_chain)->first) \
+ for ((extentpointer) = (extent_chain)->first, (value) = \
+ (extentpointer)->start; \
+ ((extentpointer) && ((extentpointer)->next || (value) <= \
+ (extentpointer)->end)); \
+ (((value) == (extentpointer)->end) ? \
+ ((extentpointer) = (extentpointer)->next, (value) = \
+ ((extentpointer) ? (extentpointer)->start : 0)) : \
+ (value)++))
+
+extern void toi_put_extent_chain_from(struct hibernate_extent_chain *chain, unsigned long from);
+#endif
diff --git a/kernel/power/tuxonice_file.c b/kernel/power/tuxonice_file.c
new file mode 100644
index 000000000..607246051
--- /dev/null
+++ b/kernel/power/tuxonice_file.c
@@ -0,0 +1,484 @@
+/*
+ * kernel/power/tuxonice_file.c
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of a simple file as a
+ * backing store. It is based upon the swapallocator, and shares the
+ * same basic working. Here, though, we have nothing to do with
+ * swapspace, and only one device to worry about.
+ *
+ * The user can just
+ *
+ * echo TuxOnIce > /path/to/my_file
+ *
+ * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
+ *
+ * and
+ *
+ * echo /path/to/my_file > /sys/power/tuxonice/file/target
+ *
+ * then put what they find in /sys/power/tuxonice/resume
+ * as their resume= parameter in lilo.conf (and rerun lilo if using it).
+ *
+ * Having done this, they're ready to hibernate and resume.
+ *
+ * TODO:
+ * - File resizing.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mount.h>
+#include <linux/fs.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_io.h"
+
+#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
+
+static struct toi_module_ops toi_fileops;
+
+static struct file *target_file;
+static struct block_device *toi_file_target_bdev;
+static unsigned long pages_available, pages_allocated;
+static char toi_file_target[256];
+static struct inode *target_inode;
+static int file_target_priority;
+static int used_devt;
+static int target_claim;
+static dev_t toi_file_dev_t;
+static int sig_page_index;
+
+/* For test_toi_file_target */
+static struct toi_bdev_info *file_chain;
+
+static int has_contiguous_blocks(struct toi_bdev_info *dev_info, int page_num)
+{
+ int j;
+ sector_t last = 0;
+
+ for (j = 0; j < dev_info->blocks_per_page; j++) {
+ sector_t this = bmap(target_inode,
+ page_num * dev_info->blocks_per_page + j);
+
+ if (!this || (last && (last + 1) != this))
+ break;
+
+ last = this;
+ }
+
+ return j == dev_info->blocks_per_page;
+}
+
+static unsigned long get_usable_pages(struct toi_bdev_info *dev_info)
+{
+ unsigned long result = 0;
+ struct block_device *bdev = dev_info->bdev;
+ int i;
+
+ switch (target_inode->i_mode & S_IFMT) {
+ case S_IFSOCK:
+ case S_IFCHR:
+ case S_IFIFO: /* Socket, Char, Fifo */
+ return -1;
+ case S_IFREG: /* Regular file: current size - holes + free
+ space on part */
+ for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++) {
+ if (has_contiguous_blocks(dev_info, i))
+ result++;
+ }
+ break;
+ case S_IFBLK: /* Block device */
+ if (!bdev->bd_disk) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0,
+ "bdev->bd_disk null.");
+ return 0;
+ }
+
+ result = (bdev->bd_part ?
+ bdev->bd_part->nr_sects :
+ get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9);
+ }
+
+
+ return result;
+}
+
+static int toi_file_register_storage(void)
+{
+ struct toi_bdev_info *devinfo;
+ int result = 0;
+ struct fs_info *fs_info;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_file_register_storage.");
+ if (!strlen(toi_file_target)) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Register file storage: "
+ "No target filename set.");
+ return 0;
+ }
+
+ target_file = filp_open(toi_file_target, O_RDONLY|O_LARGEFILE, 0);
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "filp_open %s returned %p.",
+ toi_file_target, target_file);
+
+ if (IS_ERR(target_file) || !target_file) {
+ target_file = NULL;
+ toi_file_dev_t = name_to_dev_t(toi_file_target);
+ if (!toi_file_dev_t) {
+ struct kstat stat;
+ int error = vfs_stat(toi_file_target, &stat);
+ printk(KERN_INFO "Open file %s returned %p and "
+ "name_to_devt failed.\n",
+ toi_file_target, target_file);
+ if (error) {
+ printk(KERN_INFO "Stating the file also failed."
+ " Nothing more we can do.\n");
+ return 0;
+ } else
+ toi_file_dev_t = stat.rdev;
+ }
+
+ toi_file_target_bdev = toi_open_by_devnum(toi_file_dev_t);
+ if (IS_ERR(toi_file_target_bdev)) {
+ printk(KERN_INFO "Got a dev_num (%lx) but failed to "
+ "open it.\n",
+ (unsigned long) toi_file_dev_t);
+ toi_file_target_bdev = NULL;
+ return 0;
+ }
+ used_devt = 1;
+ target_inode = toi_file_target_bdev->bd_inode;
+ } else
+ target_inode = target_file->f_mapping->host;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Succeeded in opening the target.");
+ if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
+ S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
+ printk(KERN_INFO "File support works with regular files,"
+ " character files and block devices.\n");
+ /* Cleanup routine will undo the above */
+ return 0;
+ }
+
+ if (!used_devt) {
+ if (S_ISBLK(target_inode->i_mode)) {
+ toi_file_target_bdev = I_BDEV(target_inode);
+ if (!blkdev_get(toi_file_target_bdev, FMODE_WRITE |
+ FMODE_READ, NULL))
+ target_claim = 1;
+ } else
+ toi_file_target_bdev = target_inode->i_sb->s_bdev;
+ if (!toi_file_target_bdev) {
+ printk(KERN_INFO "%s is not a valid file allocator "
+ "target.\n", toi_file_target);
+ return 0;
+ }
+ toi_file_dev_t = toi_file_target_bdev->bd_dev;
+ }
+
+ devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info), GFP_ATOMIC);
+ if (!devinfo) {
+ printk("Failed to allocate a toi_bdev_info struct for the file allocator.\n");
+ return -ENOMEM;
+ }
+
+ devinfo->bdev = toi_file_target_bdev;
+ devinfo->allocator = &toi_fileops;
+ devinfo->allocator_index = 0;
+
+ fs_info = fs_info_from_block_dev(toi_file_target_bdev);
+ if (fs_info && !IS_ERR(fs_info)) {
+ memcpy(devinfo->uuid, &fs_info->uuid, 16);
+ free_fs_info(fs_info);
+ } else
+ result = (int) PTR_ERR(fs_info);
+
+ /* Unlike swap code, only complain if fs_info_from_block_dev returned
+ * -ENOMEM. The 'file' might be a full partition, so might validly not
+ * have an identifiable type, UUID etc.
+ */
+ if (result)
+ printk(KERN_DEBUG "Failed to get fs_info for file device (%d).\n",
+ result);
+ devinfo->dev_t = toi_file_dev_t;
+ devinfo->prio = file_target_priority;
+ devinfo->bmap_shift = target_inode->i_blkbits - 9;
+ devinfo->blocks_per_page =
+ (1 << (PAGE_SHIFT - target_inode->i_blkbits));
+ sprintf(devinfo->name, "file %s", toi_file_target);
+ file_chain = devinfo;
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Dev_t is %lx. Prio is %d. Bmap "
+ "shift is %d. Blocks per page %d.",
+ devinfo->dev_t, devinfo->prio, devinfo->bmap_shift,
+ devinfo->blocks_per_page);
+
+ /* Keep one aside for the signature */
+ pages_available = get_usable_pages(devinfo) - 1;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering file storage, %lu "
+ "pages.", pages_available);
+
+ toi_bio_ops.register_storage(devinfo);
+ return 0;
+}
+
+static unsigned long toi_file_storage_available(void)
+{
+ return pages_available;
+}
+
+static int toi_file_allocate_storage(struct toi_bdev_info *chain,
+ unsigned long request)
+{
+ unsigned long available = pages_available - pages_allocated;
+ unsigned long to_add = min(available, request);
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Pages available is %lu. Allocated "
+ "is %lu. Allocating %lu pages from file.",
+ pages_available, pages_allocated, to_add);
+ pages_allocated += to_add;
+
+ return to_add;
+}
+
+/**
+ * __populate_block_list - add an extent to the chain
+ * @min: Start of the extent (first physical block = sector)
+ * @max: End of the extent (last physical block = sector)
+ *
+ * If TOI_TEST_BIO is set, print a debug message, outputting the min and max
+ * fs block numbers.
+ **/
+static int __populate_block_list(struct toi_bdev_info *chain, int min, int max)
+{
+ if (test_action_state(TOI_TEST_BIO))
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %d-%d.",
+ min << chain->bmap_shift,
+ ((max + 1) << chain->bmap_shift) - 1);
+
+ return toi_add_to_extent_chain(&chain->blocks, min, max);
+}
+
+static int get_main_pool_phys_params(struct toi_bdev_info *chain)
+{
+ int i, extent_min = -1, extent_max = -1, result = 0, have_sig_page = 0;
+ unsigned long pages_mapped = 0;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Getting file allocator blocks.");
+
+ if (chain->blocks.first)
+ toi_put_extent_chain(&chain->blocks);
+
+ if (!target_is_normal_file()) {
+ result = (pages_available > 0) ?
+ __populate_block_list(chain, chain->blocks_per_page,
+ (pages_allocated + 1) *
+ chain->blocks_per_page - 1) : 0;
+ return result;
+ }
+
+ /*
+ * FIXME: We are assuming the first page is contiguous. Is that
+ * assumption always right?
+ */
+
+ for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
+ sector_t new_sector;
+
+ if (!has_contiguous_blocks(chain, i))
+ continue;
+
+ if (!have_sig_page) {
+ have_sig_page = 1;
+ sig_page_index = i;
+ continue;
+ }
+
+ pages_mapped++;
+
+ /* Ignore first page - it has the header */
+ if (pages_mapped == 1)
+ continue;
+
+ new_sector = bmap(target_inode, (i * chain->blocks_per_page));
+
+ /*
+ * I'd love to be able to fill in holes and resize
+ * files, but not yet...
+ */
+
+ if (new_sector == extent_max + 1)
+ extent_max += chain->blocks_per_page;
+ else {
+ if (extent_min > -1) {
+ result = __populate_block_list(chain,
+ extent_min, extent_max);
+ if (result)
+ return result;
+ }
+
+ extent_min = new_sector;
+ extent_max = extent_min +
+ chain->blocks_per_page - 1;
+ }
+
+ if (pages_mapped == pages_allocated)
+ break;
+ }
+
+ if (extent_min > -1) {
+ result = __populate_block_list(chain, extent_min, extent_max);
+ if (result)
+ return result;
+ }
+
+ return 0;
+}
+
+static void toi_file_free_storage(struct toi_bdev_info *chain)
+{
+ pages_allocated = 0;
+ file_chain = NULL;
+}
+
+/**
+ * toi_file_print_debug_stats - print debug info
+ * @buffer: Buffer to data to populate
+ * @size: Size of the buffer
+ **/
+static int toi_file_print_debug_stats(char *buffer, int size)
+{
+ int len = scnprintf(buffer, size, "- File Allocator active.\n");
+
+ len += scnprintf(buffer+len, size-len, " Storage available for "
+ "image: %lu pages.\n", pages_available);
+
+ return len;
+}
+
+static void toi_file_cleanup(int finishing_cycle)
+{
+ if (toi_file_target_bdev) {
+ if (target_claim) {
+ blkdev_put(toi_file_target_bdev, FMODE_WRITE | FMODE_READ);
+ target_claim = 0;
+ }
+
+ if (used_devt) {
+ blkdev_put(toi_file_target_bdev,
+ FMODE_READ | FMODE_NDELAY);
+ used_devt = 0;
+ }
+ toi_file_target_bdev = NULL;
+ target_inode = NULL;
+ }
+
+ if (target_file) {
+ filp_close(target_file, NULL);
+ target_file = NULL;
+ }
+
+ pages_available = 0;
+}
+
+/**
+ * test_toi_file_target - sysfs callback for /sys/power/tuxonince/file/target
+ *
+ * Test wheter the target file is valid for hibernating.
+ **/
+static void test_toi_file_target(void)
+{
+ int result = toi_file_register_storage();
+ sector_t sector;
+ char buf[50];
+ struct fs_info *fs_info;
+
+ if (result || !file_chain)
+ return;
+
+ /* This doesn't mean we're in business. Is any storage available? */
+ if (!pages_available)
+ goto out;
+
+ toi_file_allocate_storage(file_chain, 1);
+ result = get_main_pool_phys_params(file_chain);
+ if (result)
+ goto out;
+
+
+ sector = bmap(target_inode, sig_page_index *
+ file_chain->blocks_per_page) << file_chain->bmap_shift;
+
+ /* Use the uuid, or the dev_t if that fails */
+ fs_info = fs_info_from_block_dev(toi_file_target_bdev);
+ if (!fs_info || IS_ERR(fs_info)) {
+ bdevname(toi_file_target_bdev, buf);
+ sprintf(resume_file, "/dev/%s:%llu", buf,
+ (unsigned long long) sector);
+ } else {
+ int i;
+ hex_dump_to_buffer(fs_info->uuid, 16, 32, 1, buf, 50, 0);
+
+ /* Remove the spaces */
+ for (i = 1; i < 16; i++) {
+ buf[2 * i] = buf[3 * i];
+ buf[2 * i + 1] = buf[3 * i + 1];
+ }
+ buf[32] = 0;
+ sprintf(resume_file, "UUID=%s:0x%llx", buf,
+ (unsigned long long) sector);
+ free_fs_info(fs_info);
+ }
+
+ toi_attempt_to_parse_resume_device(0);
+out:
+ toi_file_free_storage(file_chain);
+ toi_bio_ops.free_storage();
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_STRING("target", SYSFS_RW, toi_file_target, 256,
+ SYSFS_NEEDS_SM_FOR_WRITE, test_toi_file_target),
+ SYSFS_INT("enabled", SYSFS_RW, &toi_fileops.enabled, 0, 1, 0, NULL),
+ SYSFS_INT("priority", SYSFS_RW, &file_target_priority, -4095,
+ 4096, 0, NULL),
+};
+
+static struct toi_bio_allocator_ops toi_bio_fileops = {
+ .register_storage = toi_file_register_storage,
+ .storage_available = toi_file_storage_available,
+ .allocate_storage = toi_file_allocate_storage,
+ .bmap = get_main_pool_phys_params,
+ .free_storage = toi_file_free_storage,
+};
+
+static struct toi_module_ops toi_fileops = {
+ .type = BIO_ALLOCATOR_MODULE,
+ .name = "file storage",
+ .directory = "file",
+ .module = THIS_MODULE,
+ .print_debug_info = toi_file_print_debug_stats,
+ .cleanup = toi_file_cleanup,
+ .bio_allocator_ops = &toi_bio_fileops,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_file_load(void)
+{
+ return toi_register_module(&toi_fileops);
+}
+
+late_initcall(toi_file_load);
diff --git a/kernel/power/tuxonice_highlevel.c b/kernel/power/tuxonice_highlevel.c
new file mode 100644
index 000000000..bdcd832f3
--- /dev/null
+++ b/kernel/power/tuxonice_highlevel.c
@@ -0,0 +1,1413 @@
+/*
+ * kernel/power/tuxonice_highlevel.c
+ */
+/** \mainpage TuxOnIce.
+ *
+ * TuxOnIce provides support for saving and restoring an image of
+ * system memory to an arbitrary storage device, either on the local computer,
+ * or across some network. The support is entirely OS based, so TuxOnIce
+ * works without requiring BIOS, APM or ACPI support. The vast majority of the
+ * code is also architecture independant, so it should be very easy to port
+ * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
+ * and preemption. Initramfses and initrds are also supported.
+ *
+ * TuxOnIce uses a modular design, in which the method of storing the image is
+ * completely abstracted from the core code, as are transformations on the data
+ * such as compression and/or encryption (multiple 'modules' can be used to
+ * provide arbitrary combinations of functionality). The user interface is also
+ * modular, so that arbitrarily simple or complex interfaces can be used to
+ * provide anything from debugging information through to eye candy.
+ *
+ * \section Copyright
+ *
+ * TuxOnIce is released under the GPLv2.
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)<BR>
+ *
+ * \section Credits
+ *
+ * Nigel would like to thank the following people for their work:
+ *
+ * Bernard Blackham <bernard@blackham.com.au><BR>
+ * Web page & Wiki administration, some coding. A person without whom
+ * TuxOnIce would not be where it is.
+ *
+ * Michael Frank <mhf@linuxmail.org><BR>
+ * Extensive testing and help with improving stability. I was constantly
+ * amazed by the quality and quantity of Michael's help.
+ *
+ * Pavel Machek <pavel@ucw.cz><BR>
+ * Modifications, defectiveness pointing, being with Gabor at the very
+ * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
+ * 2.5.17. Even though Pavel and I disagree on the direction suspend to
+ * disk should take, I appreciate the valuable work he did in helping Gabor
+ * get the concept working.
+ *
+ * ..and of course the myriads of TuxOnIce users who have helped diagnose
+ * and fix bugs, made suggestions on how to improve the code, proofread
+ * documentation, and donated time and money.
+ *
+ * Thanks also to corporate sponsors:
+ *
+ * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
+ *
+ * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
+ * allowed him to work on TuxOnIce and PM related issues on company time.
+ *
+ * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
+ * 2003 to Jan 2004.
+ *
+ * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
+ * maintenance of SMP and Highmem support.
+ *
+ * <B>OSDL.</B> Provided access to various hardware configurations, make
+ * occasional small donations to the project.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/freezer.h>
+#include <generated/utsrelease.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/writeback.h>
+#include <linux/uaccess.h> /* for get/set_fs & KERNEL_DS on i386 */
+#include <linux/bio.h>
+#include <linux/kgdb.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_cluster.h"
+
+/*! Pageset metadata. */
+struct pagedir pagedir2 = {2};
+
+static mm_segment_t oldfs;
+static DEFINE_MUTEX(tuxonice_in_use);
+static int block_dump_save;
+
+int toi_trace_index;
+
+/* Binary signature if an image is present */
+char tuxonice_signature[9] = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
+
+unsigned long boot_kernel_data_buffer;
+
+static char *result_strings[] = {
+ "Hibernation was aborted",
+ "The user requested that we cancel the hibernation",
+ "No storage was available",
+ "Insufficient storage was available",
+ "Freezing filesystems and/or tasks failed",
+ "A pre-existing image was used",
+ "We would free memory, but image size limit doesn't allow this",
+ "Unable to free enough memory to hibernate",
+ "Unable to obtain the Power Management Semaphore",
+ "A device suspend/resume returned an error",
+ "A system device suspend/resume returned an error",
+ "The extra pages allowance is too small",
+ "We were unable to successfully prepare an image",
+ "TuxOnIce module initialisation failed",
+ "TuxOnIce module cleanup failed",
+ "I/O errors were encountered",
+ "Ran out of memory",
+ "An error was encountered while reading the image",
+ "Platform preparation failed",
+ "CPU Hotplugging failed",
+ "Architecture specific preparation failed",
+ "Pages needed resaving, but we were told to abort if this happens",
+ "We can't hibernate at the moment (invalid resume= or filewriter "
+ "target?)",
+ "A hibernation preparation notifier chain member cancelled the "
+ "hibernation",
+ "Pre-snapshot preparation failed",
+ "Pre-restore preparation failed",
+ "Failed to disable usermode helpers",
+ "Can't resume from alternate image",
+ "Header reservation too small",
+ "Device Power Management Preparation failed",
+};
+
+/**
+ * toi_finish_anything - cleanup after doing anything
+ * @hibernate_or_resume: Whether finishing a cycle or attempt at
+ * resuming.
+ *
+ * This is our basic clean-up routine, matching start_anything below. We
+ * call cleanup routines, drop module references and restore process fs and
+ * cpus allowed masks, together with the global block_dump variable's value.
+ **/
+void toi_finish_anything(int hibernate_or_resume)
+{
+ toi_running = 0;
+ toi_cleanup_modules(hibernate_or_resume);
+ toi_put_modules();
+ if (hibernate_or_resume) {
+ block_dump = block_dump_save;
+ set_cpus_allowed_ptr(current, cpu_all_mask);
+ toi_alloc_print_debug_stats();
+ atomic_inc(&snapshot_device_available);
+ unlock_system_sleep();
+ }
+
+ set_fs(oldfs);
+ mutex_unlock(&tuxonice_in_use);
+}
+
+/**
+ * toi_start_anything - basic initialisation for TuxOnIce
+ * @toi_or_resume: Whether starting a cycle or attempt at resuming.
+ *
+ * Our basic initialisation routine. Take references on modules, use the
+ * kernel segment, recheck resume= if no active allocator is set, initialise
+ * modules, save and reset block_dump and ensure we're running on CPU0.
+ **/
+int toi_start_anything(int hibernate_or_resume)
+{
+ mutex_lock(&tuxonice_in_use);
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+
+ toi_trace_index = 0;
+
+ if (hibernate_or_resume) {
+ lock_system_sleep();
+
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0))
+ goto snapshotdevice_unavailable;
+ }
+
+ if (hibernate_or_resume == SYSFS_HIBERNATE)
+ toi_print_modules();
+
+ if (toi_get_modules()) {
+ printk(KERN_INFO "TuxOnIce: Get modules failed!\n");
+ goto prehibernate_err;
+ }
+
+ if (hibernate_or_resume) {
+ block_dump_save = block_dump;
+ block_dump = 0;
+ set_cpus_allowed_ptr(current,
+ cpumask_of(cpumask_first(cpu_online_mask)));
+ }
+
+ if (toi_initialise_modules_early(hibernate_or_resume))
+ goto early_init_err;
+
+ if (!toiActiveAllocator)
+ toi_attempt_to_parse_resume_device(!hibernate_or_resume);
+
+ if (!toi_initialise_modules_late(hibernate_or_resume)) {
+ toi_running = 1; /* For the swsusp code we use :< */
+ return 0;
+ }
+
+ toi_cleanup_modules(hibernate_or_resume);
+early_init_err:
+ if (hibernate_or_resume) {
+ block_dump_save = block_dump;
+ set_cpus_allowed_ptr(current, cpu_all_mask);
+ }
+ toi_put_modules();
+prehibernate_err:
+ if (hibernate_or_resume)
+ atomic_inc(&snapshot_device_available);
+snapshotdevice_unavailable:
+ if (hibernate_or_resume)
+ mutex_unlock(&pm_mutex);
+ set_fs(oldfs);
+ mutex_unlock(&tuxonice_in_use);
+ return -EBUSY;
+}
+
+/*
+ * Nosave page tracking.
+ *
+ * Here rather than in prepare_image because we want to do it once only at the
+ * start of a cycle.
+ */
+
+/**
+ * mark_nosave_pages - set up our Nosave bitmap
+ *
+ * Build a bitmap of Nosave pages from the list. The bitmap allows faster
+ * use when preparing the image.
+ **/
+static void mark_nosave_pages(void)
+{
+ struct nosave_region *region;
+
+ list_for_each_entry(region, &nosave_regions, list) {
+ unsigned long pfn;
+
+ for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+ if (pfn_valid(pfn)) {
+ SetPageNosave(pfn_to_page(pfn));
+ }
+ }
+}
+
+/**
+ * allocate_bitmaps - allocate bitmaps used to record page states
+ *
+ * Allocate the bitmaps we use to record the various TuxOnIce related
+ * page states.
+ **/
+static int allocate_bitmaps(void)
+{
+ if (toi_alloc_bitmap(&pageset1_map) ||
+ toi_alloc_bitmap(&pageset1_copy_map) ||
+ toi_alloc_bitmap(&pageset2_map) ||
+ toi_alloc_bitmap(&io_map) ||
+ toi_alloc_bitmap(&nosave_map) ||
+ toi_alloc_bitmap(&free_map) ||
+ toi_alloc_bitmap(&compare_map) ||
+ toi_alloc_bitmap(&page_resave_map))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * free_bitmaps - free the bitmaps used to record page states
+ *
+ * Free the bitmaps allocated above. It is not an error to call
+ * memory_bm_free on a bitmap that isn't currently allocated.
+ **/
+static void free_bitmaps(void)
+{
+ toi_free_bitmap(&pageset1_map);
+ toi_free_bitmap(&pageset1_copy_map);
+ toi_free_bitmap(&pageset2_map);
+ toi_free_bitmap(&io_map);
+ toi_free_bitmap(&nosave_map);
+ toi_free_bitmap(&free_map);
+ toi_free_bitmap(&compare_map);
+ toi_free_bitmap(&page_resave_map);
+}
+
+/**
+ * io_MB_per_second - return the number of MB/s read or written
+ * @write: Whether to return the speed at which we wrote.
+ *
+ * Calculate the number of megabytes per second that were read or written.
+ **/
+static int io_MB_per_second(int write)
+{
+ return (toi_bkd.toi_io_time[write][1]) ?
+ MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
+ toi_bkd.toi_io_time[write][1] : 0;
+}
+
+#define SNPRINTF(a...) do { len += scnprintf(((char *) buffer) + len, \
+ count - len - 1, ## a); } while (0)
+
+/**
+ * get_debug_info - fill a buffer with debugging information
+ * @buffer: The buffer to be filled.
+ * @count: The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ **/
+static int get_toi_debug_info(const char *buffer, int count)
+{
+ int len = 0, i, first_result = 1;
+
+ SNPRINTF("TuxOnIce debugging info:\n");
+ SNPRINTF("- TuxOnIce core : " TOI_CORE_VERSION "\n");
+ SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
+ SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
+ SNPRINTF("- Attempt number : %d\n", nr_hibernates);
+ SNPRINTF("- Parameters : %ld %ld %ld %d %ld %ld\n",
+ toi_result,
+ toi_bkd.toi_action,
+ toi_bkd.toi_debug_state,
+ toi_bkd.toi_default_console_level,
+ image_size_limit,
+ toi_poweroff_method);
+ SNPRINTF("- Overall expected compression percentage: %d.\n",
+ 100 - toi_expected_compression_ratio());
+ len += toi_print_module_debug_info(((char *) buffer) + len,
+ count - len - 1);
+ if (toi_bkd.toi_io_time[0][1]) {
+ if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
+ SNPRINTF("- I/O speed: Write %ld KB/s",
+ (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+ toi_bkd.toi_io_time[0][1]));
+ if (toi_bkd.toi_io_time[1][1])
+ SNPRINTF(", Read %ld KB/s",
+ (KB((unsigned long)
+ toi_bkd.toi_io_time[1][0]) * HZ /
+ toi_bkd.toi_io_time[1][1]));
+ } else {
+ SNPRINTF("- I/O speed: Write %ld MB/s",
+ (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+ toi_bkd.toi_io_time[0][1]));
+ if (toi_bkd.toi_io_time[1][1])
+ SNPRINTF(", Read %ld MB/s",
+ (MB((unsigned long)
+ toi_bkd.toi_io_time[1][0]) * HZ /
+ toi_bkd.toi_io_time[1][1]));
+ }
+ SNPRINTF(".\n");
+ } else
+ SNPRINTF("- No I/O speed stats available.\n");
+ SNPRINTF("- Extra pages : %lu used/%lu.\n",
+ extra_pd1_pages_used, extra_pd1_pages_allowance);
+
+ for (i = 0; i < TOI_NUM_RESULT_STATES; i++)
+ if (test_result_state(i)) {
+ SNPRINTF("%s: %s.\n", first_result ?
+ "- Result " :
+ " ",
+ result_strings[i]);
+ first_result = 0;
+ }
+ if (first_result)
+ SNPRINTF("- Result : %s.\n", nr_hibernates ?
+ "Succeeded" :
+ "No hibernation attempts so far");
+ return len;
+}
+
+#ifdef CONFIG_TOI_INCREMENTAL
+/**
+ * get_toi_page_state - fill a buffer with page state information
+ * @buffer: The buffer to be filled.
+ * @count: The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ **/
+static int get_toi_page_state(const char *buffer, int count)
+{
+ int free = 0, untracked = 0, dirty = 0, ro = 0, invalid = 0, other = 0, total = 0;
+ int len = 0;
+ struct zone *zone;
+ int allocated_bitmaps = 0;
+
+ set_cpus_allowed_ptr(current,
+ cpumask_of(cpumask_first(cpu_online_mask)));
+
+ if (!free_map) {
+ BUG_ON(toi_alloc_bitmap(&free_map));
+ allocated_bitmaps = 1;
+ }
+
+ toi_generate_free_page_map();
+
+ for_each_populated_zone(zone) {
+ unsigned long loop;
+
+ total += zone->spanned_pages;
+
+ for (loop = 0; loop < zone->spanned_pages; loop++) {
+ unsigned long pfn = zone->zone_start_pfn + loop;
+ struct page *page;
+ int chunk_size;
+
+ if (!pfn_valid(pfn)) {
+ continue;
+ }
+
+ chunk_size = toi_size_of_free_region(zone, pfn);
+ if (chunk_size) {
+ /*
+ * If the page gets allocated, it will be need
+ * saving in an image.
+ * Don't bother with explicitly removing any
+ * RO protection applied below.
+ * We'll SetPageTOI_Dirty(page) if/when it
+ * gets allocated.
+ */
+ free += chunk_size;
+ loop += chunk_size - 1;
+ continue;
+ }
+
+ page = pfn_to_page(pfn);
+
+ if (PageTOI_Untracked(page)) {
+ untracked++;
+ } else if (PageTOI_RO(page)) {
+ ro++;
+ } else if (PageTOI_Dirty(page)) {
+ dirty++;
+ } else {
+ printk("Page %ld state 'other'.\n", pfn);
+ other++;
+ }
+ }
+ }
+
+ if (allocated_bitmaps) {
+ toi_free_bitmap(&free_map);
+ }
+
+ set_cpus_allowed_ptr(current, cpu_all_mask);
+
+ SNPRINTF("TuxOnIce page breakdown:\n");
+ SNPRINTF("- Free : %d\n", free);
+ SNPRINTF("- Untracked : %d\n", untracked);
+ SNPRINTF("- Read only : %d\n", ro);
+ SNPRINTF("- Dirty : %d\n", dirty);
+ SNPRINTF("- Other : %d\n", other);
+ SNPRINTF("- Invalid : %d\n", invalid);
+ SNPRINTF("- Total : %d\n", total);
+ return len;
+}
+#endif
+
+/**
+ * do_cleanup - cleanup after attempting to hibernate or resume
+ * @get_debug_info: Whether to allocate and return debugging info.
+ *
+ * Cleanup after attempting to hibernate or resume, possibly getting
+ * debugging info as we do so.
+ **/
+static void do_cleanup(int get_debug_info, int restarting)
+{
+ int i = 0;
+ char *buffer = NULL;
+
+ trap_non_toi_io = 0;
+
+ if (get_debug_info)
+ toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
+
+ free_checksum_pages();
+
+ toi_cbw_restore();
+ toi_free_cbw_data();
+
+ if (get_debug_info)
+ buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
+
+ if (buffer)
+ i = get_toi_debug_info(buffer, PAGE_SIZE);
+
+ toi_free_extra_pagedir_memory();
+
+ pagedir1.size = 0;
+ pagedir2.size = 0;
+ set_highmem_size(pagedir1, 0);
+ set_highmem_size(pagedir2, 0);
+
+ if (boot_kernel_data_buffer) {
+ if (!test_toi_state(TOI_BOOT_KERNEL))
+ toi_free_page(37, boot_kernel_data_buffer);
+ boot_kernel_data_buffer = 0;
+ }
+
+ if (test_toi_state(TOI_DEVICE_HOTPLUG_LOCKED)) {
+ unlock_device_hotplug();
+ clear_toi_state(TOI_DEVICE_HOTPLUG_LOCKED);
+ }
+
+ clear_toi_state(TOI_BOOT_KERNEL);
+ if (current->flags & PF_SUSPEND_TASK)
+ thaw_processes();
+
+ if (!restarting)
+ toi_stop_other_threads();
+
+ if (toi_keeping_image &&
+ !test_result_state(TOI_ABORTED)) {
+ toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+ "TuxOnIce: Not invalidating the image due "
+ "to Keep Image or Incremental Image being enabled.");
+ set_result_state(TOI_KEPT_IMAGE);
+
+ /*
+ * For an incremental image, free unused storage so
+ * swap (if any) can be used for normal system operation,
+ * if so desired.
+ */
+
+ toiActiveAllocator->free_unused_storage();
+ } else
+ if (toiActiveAllocator)
+ toiActiveAllocator->remove_image();
+
+ free_bitmaps();
+ usermodehelper_enable();
+
+ if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
+ clear_toi_state(TOI_NOTIFIERS_PREPARE);
+ }
+
+ if (buffer && i) {
+ /* Printk can only handle 1023 bytes, including
+ * its level mangling. */
+ for (i = 0; i < 3; i++)
+ printk(KERN_ERR "%s", buffer + (1023 * i));
+ toi_free_page(20, (unsigned long) buffer);
+ }
+
+ if (!restarting)
+ toi_cleanup_console();
+
+ free_attention_list();
+
+ if (!restarting)
+ toi_deactivate_storage(0);
+
+ clear_toi_state(TOI_IGNORE_LOGLEVEL);
+ clear_toi_state(TOI_TRYING_TO_RESUME);
+ clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * check_still_keeping_image - we kept an image; check whether to reuse it.
+ *
+ * We enter this routine when we have kept an image. If the user has said they
+ * want to still keep it, all we need to do is powerdown. If powering down
+ * means hibernating to ram and the power doesn't run out, we'll return 1.
+ * If we do power off properly or the battery runs out, we'll resume via the
+ * normal paths.
+ *
+ * If the user has said they want to remove the previously kept image, we
+ * remove it, and return 0. We'll then store a new image.
+ **/
+static int check_still_keeping_image(void)
+{
+ if (toi_keeping_image) {
+ if (!test_action_state(TOI_INCREMENTAL_IMAGE)) {
+ printk(KERN_INFO "Image already stored: powering down "
+ "immediately.");
+ do_toi_step(STEP_HIBERNATE_POWERDOWN);
+ return 1;
+ }
+ /**
+ * Incremental image - need to write new part.
+ * We detect that we're writing an incremental image by looking
+ * at test_result_state(TOI_KEPT_IMAGE)
+ **/
+ return 0;
+ }
+
+ printk(KERN_INFO "Invalidating previous image.\n");
+ toiActiveAllocator->remove_image();
+
+ return 0;
+}
+
+/**
+ * toi_init - prepare to hibernate to disk
+ *
+ * Initialise variables & data structures, in preparation for
+ * hibernating to disk.
+ **/
+static int toi_init(int restarting)
+{
+ int result, i, j;
+
+ toi_result = 0;
+
+ printk(KERN_INFO "Initiating a hibernation cycle.\n");
+
+ nr_hibernates++;
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ toi_bkd.toi_io_time[i][j] = 0;
+
+ if (!test_toi_state(TOI_CAN_HIBERNATE) ||
+ allocate_bitmaps())
+ return 1;
+
+ mark_nosave_pages();
+
+ if (!restarting)
+ toi_prepare_console();
+
+ result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (result) {
+ set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
+ return 1;
+ }
+ set_toi_state(TOI_NOTIFIERS_PREPARE);
+
+ if (!restarting) {
+ printk(KERN_ERR "Starting other threads.");
+ toi_start_other_threads();
+ }
+
+ result = usermodehelper_disable();
+ if (result) {
+ printk(KERN_ERR "TuxOnIce: Failed to disable usermode "
+ "helpers\n");
+ set_result_state(TOI_USERMODE_HELPERS_ERR);
+ return 1;
+ }
+
+ boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
+ if (!boot_kernel_data_buffer) {
+ printk(KERN_ERR "TuxOnIce: Failed to allocate "
+ "boot_kernel_data_buffer.\n");
+ set_result_state(TOI_OUT_OF_MEMORY);
+ return 1;
+ }
+
+ toi_allocate_cbw_data();
+
+ return 0;
+}
+
+/**
+ * can_hibernate - perform basic 'Can we hibernate?' tests
+ *
+ * Perform basic tests that must pass if we're going to be able to hibernate:
+ * Can we get the pm_mutex? Is resume= valid (we need to know where to write
+ * the image header).
+ **/
+static int can_hibernate(void)
+{
+ if (!test_toi_state(TOI_CAN_HIBERNATE))
+ toi_attempt_to_parse_resume_device(0);
+
+ if (!test_toi_state(TOI_CAN_HIBERNATE)) {
+ printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
+ "This may be because you haven't put something along "
+ "the lines of\n\nresume=swap:/dev/hda1\n\n"
+ "in lilo.conf or equivalent. (Where /dev/hda1 is your "
+ "swap partition).\n");
+ set_abort_result(TOI_CANT_SUSPEND);
+ return 0;
+ }
+
+ if (strlen(alt_resume_param)) {
+ attempt_to_parse_alt_resume_param();
+
+ if (!strlen(alt_resume_param)) {
+ printk(KERN_INFO "Alternate resume parameter now "
+ "invalid. Aborting.\n");
+ set_abort_result(TOI_CANT_USE_ALT_RESUME);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/**
+ * do_post_image_write - having written an image, figure out what to do next
+ *
+ * After writing an image, we might load an alternate image or power down.
+ * Powering down might involve hibernating to ram, in which case we also
+ * need to handle reloading pageset2.
+ **/
+static int do_post_image_write(void)
+{
+ /* If switching images fails, do normal powerdown */
+ if (alt_resume_param[0])
+ do_toi_step(STEP_RESUME_ALT_IMAGE);
+
+ toi_power_down();
+
+ barrier();
+ mb();
+ return 0;
+}
+
+/**
+ * __save_image - do the hard work of saving the image
+ *
+ * High level routine for getting the image saved. The key assumptions made
+ * are that processes have been frozen and sufficient memory is available.
+ *
+ * We also exit through here at resume time, coming back from toi_hibernate
+ * after the atomic restore. This is the reason for the toi_in_hibernate
+ * test.
+ **/
+static int __save_image(void)
+{
+ int temp_result, did_copy = 0;
+
+ toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
+
+ toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+ " - Final values: %d and %d.",
+ pagedir1.size, pagedir2.size);
+
+ toi_cond_pause(1, "About to write pagedir2.");
+
+ temp_result = write_pageset(&pagedir2);
+
+ if (temp_result == -1 || test_result_state(TOI_ABORTED))
+ return 1;
+
+ toi_cond_pause(1, "About to copy pageset 1.");
+
+ if (test_result_state(TOI_ABORTED))
+ return 1;
+
+ toi_deactivate_storage(1);
+
+ toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+ toi_in_hibernate = 1;
+
+ if (toi_go_atomic(PMSG_FREEZE, 1))
+ goto Failed;
+
+ temp_result = toi_hibernate();
+
+#ifdef CONFIG_KGDB
+ if (test_action_state(TOI_POST_RESUME_BREAKPOINT))
+ kgdb_breakpoint();
+#endif
+
+ if (!temp_result)
+ did_copy = 1;
+
+ /* We return here at resume time too! */
+ toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
+
+Failed:
+ if (toi_activate_storage(1))
+ panic("Failed to reactivate our storage.");
+
+ /* Resume time? */
+ if (!toi_in_hibernate) {
+ copyback_post();
+ return 0;
+ }
+
+ /* Nope. Hibernating. So, see if we can save the image... */
+
+ if (temp_result || test_result_state(TOI_ABORTED)) {
+ if (did_copy)
+ goto abort_reloading_pagedir_two;
+ else
+ return 1;
+ }
+
+ toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
+ NULL);
+
+ if (test_result_state(TOI_ABORTED))
+ goto abort_reloading_pagedir_two;
+
+ toi_cond_pause(1, "About to write pageset1.");
+
+ toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1");
+
+ temp_result = write_pageset(&pagedir1);
+
+ /* We didn't overwrite any memory, so no reread needs to be done. */
+ if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+ test_action_state(TOI_TEST_BIO))
+ return 1;
+
+ if (temp_result == 1 || test_result_state(TOI_ABORTED))
+ goto abort_reloading_pagedir_two;
+
+ toi_cond_pause(1, "About to write header.");
+
+ if (test_result_state(TOI_ABORTED))
+ goto abort_reloading_pagedir_two;
+
+ temp_result = write_image_header();
+
+ if (!temp_result && !test_result_state(TOI_ABORTED))
+ return 0;
+
+abort_reloading_pagedir_two:
+ temp_result = read_pageset2(1);
+
+ /* If that failed, we're sunk. Panic! */
+ if (temp_result)
+ panic("Attempt to reload pagedir 2 while aborting "
+ "a hibernate failed.");
+
+ return 1;
+}
+
+static void map_ps2_pages(int enable)
+{
+ unsigned long pfn = 0;
+
+ memory_bm_position_reset(pageset2_map);
+ pfn = memory_bm_next_pfn(pageset2_map, 0);
+
+ while (pfn != BM_END_OF_MAP) {
+ struct page *page = pfn_to_page(pfn);
+ kernel_map_pages(page, 1, enable);
+ pfn = memory_bm_next_pfn(pageset2_map, 0);
+ }
+}
+
+/**
+ * do_save_image - save the image and handle the result
+ *
+ * Save the prepared image. If we fail or we're in the path returning
+ * from the atomic restore, cleanup.
+ **/
+static int do_save_image(void)
+{
+ int result;
+ map_ps2_pages(0);
+ result = __save_image();
+ map_ps2_pages(1);
+ return result;
+}
+
+/**
+ * do_prepare_image - try to prepare an image
+ *
+ * Seek to initialise and prepare an image to be saved. On failure,
+ * cleanup.
+ **/
+static int do_prepare_image(void)
+{
+ int restarting = test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+
+ if (!restarting && toi_activate_storage(0))
+ return 1;
+
+ /*
+ * If kept image and still keeping image and hibernating to RAM, (non
+ * incremental image case) we will return 1 after hibernating and
+ * resuming (provided the power doesn't run out. In that case, we skip
+ * directly to cleaning up and exiting.
+ */
+
+ if (!can_hibernate() ||
+ (test_result_state(TOI_KEPT_IMAGE) &&
+ check_still_keeping_image()))
+ return 1;
+
+ if (toi_init(restarting) || toi_prepare_image() ||
+ test_result_state(TOI_ABORTED))
+ return 1;
+
+ trap_non_toi_io = 1;
+
+ return 0;
+}
+
+/**
+ * do_check_can_resume - find out whether an image has been stored
+ *
+ * Read whether an image exists. We use the same routine as the
+ * image_exists sysfs entry, and just look to see whether the
+ * first character in the resulting buffer is a '1'.
+ **/
+int do_check_can_resume(void)
+{
+ int result = -1;
+
+ if (toi_activate_storage(0))
+ return -1;
+
+ if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+ toi_attempt_to_parse_resume_device(1);
+
+ if (toiActiveAllocator)
+ result = toiActiveAllocator->image_exists(1);
+
+ toi_deactivate_storage(0);
+ return result;
+}
+
+/**
+ * do_load_atomic_copy - load the first part of an image, if it exists
+ *
+ * Check whether we have an image. If one exists, do sanity checking
+ * (possibly invalidating the image or even rebooting if the user
+ * requests that) before loading it into memory in preparation for the
+ * atomic restore.
+ *
+ * If and only if we have an image loaded and ready to restore, we return 1.
+ **/
+static int do_load_atomic_copy(void)
+{
+ int read_image_result = 0;
+
+ if (sizeof(swp_entry_t) != sizeof(long)) {
+ printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
+ " of long. Please report this!\n");
+ return 1;
+ }
+
+ if (!resume_file[0])
+ printk(KERN_WARNING "TuxOnIce: "
+ "You need to use a resume= command line parameter to "
+ "tell TuxOnIce where to look for an image.\n");
+
+ toi_activate_storage(0);
+
+ if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
+ !toi_attempt_to_parse_resume_device(0)) {
+ /*
+ * Without a usable storage device we can do nothing -
+ * even if noresume is given
+ */
+
+ if (!toiNumAllocators)
+ printk(KERN_ALERT "TuxOnIce: "
+ "No storage allocators have been registered.\n");
+ else
+ printk(KERN_ALERT "TuxOnIce: "
+ "Missing or invalid storage location "
+ "(resume= parameter). Please correct and "
+ "rerun lilo (or equivalent) before "
+ "hibernating.\n");
+ toi_deactivate_storage(0);
+ return 1;
+ }
+
+ if (allocate_bitmaps())
+ return 1;
+
+ read_image_result = read_pageset1(); /* non fatal error ignored */
+
+ if (test_toi_state(TOI_NORESUME_SPECIFIED))
+ clear_toi_state(TOI_NORESUME_SPECIFIED);
+
+ toi_deactivate_storage(0);
+
+ if (read_image_result)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * prepare_restore_load_alt_image - save & restore alt image variables
+ *
+ * Save and restore the pageset1 maps, when loading an alternate image.
+ **/
+static void prepare_restore_load_alt_image(int prepare)
+{
+ static struct memory_bitmap *pageset1_map_save, *pageset1_copy_map_save;
+
+ if (prepare) {
+ pageset1_map_save = pageset1_map;
+ pageset1_map = NULL;
+ pageset1_copy_map_save = pageset1_copy_map;
+ pageset1_copy_map = NULL;
+ set_toi_state(TOI_LOADING_ALT_IMAGE);
+ toi_reset_alt_image_pageset2_pfn();
+ } else {
+ toi_free_bitmap(&pageset1_map);
+ pageset1_map = pageset1_map_save;
+ toi_free_bitmap(&pageset1_copy_map);
+ pageset1_copy_map = pageset1_copy_map_save;
+ clear_toi_state(TOI_NOW_RESUMING);
+ clear_toi_state(TOI_LOADING_ALT_IMAGE);
+ }
+}
+
+/**
+ * do_toi_step - perform a step in hibernating or resuming
+ *
+ * Perform a step in hibernating or resuming an image. This abstraction
+ * is in preparation for implementing cluster support, and perhaps replacing
+ * uswsusp too (haven't looked whether that's possible yet).
+ **/
+int do_toi_step(int step)
+{
+ switch (step) {
+ case STEP_HIBERNATE_PREPARE_IMAGE:
+ return do_prepare_image();
+ case STEP_HIBERNATE_SAVE_IMAGE:
+ return do_save_image();
+ case STEP_HIBERNATE_POWERDOWN:
+ return do_post_image_write();
+ case STEP_RESUME_CAN_RESUME:
+ return do_check_can_resume();
+ case STEP_RESUME_LOAD_PS1:
+ return do_load_atomic_copy();
+ case STEP_RESUME_DO_RESTORE:
+ /*
+ * If we succeed, this doesn't return.
+ * Instead, we return from do_save_image() in the
+ * hibernated kernel.
+ */
+ return toi_atomic_restore();
+ case STEP_RESUME_ALT_IMAGE:
+ printk(KERN_INFO "Trying to resume alternate image.\n");
+ toi_in_hibernate = 0;
+ save_restore_alt_param(SAVE, NOQUIET);
+ prepare_restore_load_alt_image(1);
+ if (!do_check_can_resume()) {
+ printk(KERN_INFO "Nothing to resume from.\n");
+ goto out;
+ }
+ if (!do_load_atomic_copy())
+ toi_atomic_restore();
+
+ printk(KERN_INFO "Failed to load image.\n");
+out:
+ prepare_restore_load_alt_image(0);
+ save_restore_alt_param(RESTORE, NOQUIET);
+ break;
+ case STEP_CLEANUP:
+ do_cleanup(1, 0);
+ break;
+ case STEP_QUIET_CLEANUP:
+ do_cleanup(0, 0);
+ break;
+ }
+
+ return 0;
+}
+
+/* -- Functions for kickstarting a hibernate or resume --- */
+
+/**
+ * toi_try_resume - try to do the steps in resuming
+ *
+ * Check if we have an image and if so try to resume. Clear the status
+ * flags too.
+ **/
+void toi_try_resume(void)
+{
+ set_toi_state(TOI_TRYING_TO_RESUME);
+ resume_attempted = 1;
+
+ current->flags |= PF_MEMALLOC;
+ toi_start_other_threads();
+
+ if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
+ !do_toi_step(STEP_RESUME_LOAD_PS1))
+ do_toi_step(STEP_RESUME_DO_RESTORE);
+
+ toi_stop_other_threads();
+ do_cleanup(0, 0);
+
+ current->flags &= ~PF_MEMALLOC;
+
+ clear_toi_state(TOI_IGNORE_LOGLEVEL);
+ clear_toi_state(TOI_TRYING_TO_RESUME);
+ clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * toi_sys_power_disk_try_resume - wrapper calling toi_try_resume
+ *
+ * Wrapper for when __toi_try_resume is called from swsusp resume path,
+ * rather than from echo > /sys/power/tuxonice/do_resume.
+ **/
+static void toi_sys_power_disk_try_resume(void)
+{
+ resume_attempted = 1;
+
+ /*
+ * There's a comment in kernel/power/disk.c that indicates
+ * we should be able to use mutex_lock_nested below. That
+ * doesn't seem to cut it, though, so let's just turn lockdep
+ * off for now.
+ */
+ lockdep_off();
+
+ if (toi_start_anything(SYSFS_RESUMING))
+ goto out;
+
+ toi_try_resume();
+
+ /*
+ * For initramfs, we have to clear the boot time
+ * flag after trying to resume
+ */
+ clear_toi_state(TOI_BOOT_TIME);
+
+ toi_finish_anything(SYSFS_RESUMING);
+out:
+ lockdep_on();
+}
+
+/**
+ * toi_try_hibernate - try to start a hibernation cycle
+ *
+ * Start a hibernation cycle, coming in from either
+ * echo > /sys/power/tuxonice/do_suspend
+ *
+ * or
+ *
+ * echo disk > /sys/power/state
+ *
+ * In the later case, we come in without pm_sem taken; in the
+ * former, it has been taken.
+ **/
+int toi_try_hibernate(void)
+{
+ int result = 0, sys_power_disk = 0, retries = 0;
+
+ if (!mutex_is_locked(&tuxonice_in_use)) {
+ /* Came in via /sys/power/disk */
+ if (toi_start_anything(SYSFS_HIBERNATING))
+ return -EBUSY;
+ sys_power_disk = 1;
+ }
+
+ current->flags |= PF_MEMALLOC;
+
+ if (test_toi_state(TOI_CLUSTER_MODE)) {
+ toi_initiate_cluster_hibernate();
+ goto out;
+ }
+
+prepare:
+ result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+
+ if (result)
+ goto out;
+
+ if (test_action_state(TOI_FREEZER_TEST))
+ goto out_restore_gfp_mask;
+
+ result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+
+ if (test_result_state(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL)) {
+ if (retries < 2) {
+ do_cleanup(0, 1);
+ retries++;
+ clear_result_state(TOI_ABORTED);
+ extra_pd1_pages_allowance = extra_pd1_pages_used + 500;
+ printk(KERN_INFO "Automatically adjusting the extra"
+ " pages allowance to %ld and restarting.\n",
+ extra_pd1_pages_allowance);
+ pm_restore_gfp_mask();
+ goto prepare;
+ }
+
+ printk(KERN_INFO "Adjusted extra pages allowance twice and "
+ "still couldn't hibernate successfully. Giving up.");
+ }
+
+ /* This code runs at resume time too! */
+ if (!result && toi_in_hibernate)
+ result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+
+out_restore_gfp_mask:
+ pm_restore_gfp_mask();
+out:
+ do_cleanup(1, 0);
+ current->flags &= ~PF_MEMALLOC;
+
+ if (sys_power_disk)
+ toi_finish_anything(SYSFS_HIBERNATING);
+
+ return result;
+}
+
+/*
+ * channel_no: If !0, -c <channel_no> is added to args (userui).
+ */
+int toi_launch_userspace_program(char *command, int channel_no,
+ int wait, int debug)
+{
+ int retval;
+ static char *envp[] = {
+ "HOME=/",
+ "TERM=linux",
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ NULL };
+ static char *argv[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+ };
+ char *channel = NULL;
+ int arg = 0, size;
+ char test_read[255];
+ char *orig_posn = command;
+
+ if (!strlen(orig_posn))
+ return 1;
+
+ if (channel_no) {
+ channel = toi_kzalloc(4, 6, GFP_KERNEL);
+ if (!channel) {
+ printk(KERN_INFO "Failed to allocate memory in "
+ "preparing to launch userspace program.\n");
+ return 1;
+ }
+ }
+
+ /* Up to 6 args supported */
+ while (arg < 6) {
+ sscanf(orig_posn, "%s", test_read);
+ size = strlen(test_read);
+ if (!(size))
+ break;
+ argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
+ strcpy(argv[arg], test_read);
+ orig_posn += size + 1;
+ *test_read = 0;
+ arg++;
+ }
+
+ if (channel_no) {
+ sprintf(channel, "-c%d", channel_no);
+ argv[arg] = channel;
+ } else
+ arg--;
+
+ if (debug) {
+ argv[++arg] = toi_kzalloc(5, 8, TOI_ATOMIC_GFP);
+ strcpy(argv[arg], "--debug");
+ }
+
+ retval = call_usermodehelper(argv[0], argv, envp, wait);
+
+ /*
+ * If the program reports an error, retval = 256. Don't complain
+ * about that here.
+ */
+ if (retval && retval != 256)
+ printk(KERN_ERR "Failed to launch userspace program '%s': "
+ "Error %d\n", command, retval);
+
+ {
+ int i;
+ for (i = 0; i < arg; i++)
+ if (argv[i] && argv[i] != channel)
+ toi_kfree(5, argv[i], sizeof(*argv[i]));
+ }
+
+ toi_kfree(4, channel, sizeof(*channel));
+
+ return retval;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_LONG("extra_pages_allowance", SYSFS_RW,
+ &extra_pd1_pages_allowance, 0, LONG_MAX, 0),
+ SYSFS_CUSTOM("image_exists", SYSFS_RW, image_exists_read,
+ image_exists_write, SYSFS_NEEDS_SM_FOR_BOTH, NULL),
+ SYSFS_STRING("resume", SYSFS_RW, resume_file, 255,
+ SYSFS_NEEDS_SM_FOR_WRITE,
+ attempt_to_parse_resume_device2),
+ SYSFS_STRING("alt_resume_param", SYSFS_RW, alt_resume_param, 255,
+ SYSFS_NEEDS_SM_FOR_WRITE,
+ attempt_to_parse_alt_resume_param),
+ SYSFS_CUSTOM("debug_info", SYSFS_READONLY, get_toi_debug_info, NULL, 0,
+ NULL),
+ SYSFS_BIT("ignore_rootfs", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_IGNORE_ROOTFS, 0),
+ SYSFS_LONG("image_size_limit", SYSFS_RW, &image_size_limit, -2,
+ INT_MAX, 0),
+ SYSFS_UL("last_result", SYSFS_RW, &toi_result, 0, 0, 0),
+ SYSFS_BIT("no_multithreaded_io", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_NO_MULTITHREADED_IO, 0),
+ SYSFS_BIT("no_flusher_thread", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_NO_FLUSHER_THREAD, 0),
+ SYSFS_BIT("full_pageset2", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_PAGESET2_FULL, 0),
+ SYSFS_BIT("reboot", SYSFS_RW, &toi_bkd.toi_action, TOI_REBOOT, 0),
+ SYSFS_BIT("replace_swsusp", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_REPLACE_SWSUSP, 0),
+ SYSFS_STRING("resume_commandline", SYSFS_RW,
+ toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0,
+ NULL),
+ SYSFS_STRING("version", SYSFS_READONLY, TOI_CORE_VERSION, 0, 0, NULL),
+ SYSFS_BIT("freezer_test", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_FREEZER_TEST, 0),
+ SYSFS_BIT("test_bio", SYSFS_RW, &toi_bkd.toi_action, TOI_TEST_BIO, 0),
+ SYSFS_BIT("test_filter_speed", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_TEST_FILTER_SPEED, 0),
+ SYSFS_BIT("no_pageset2", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_NO_PAGESET2, 0),
+ SYSFS_BIT("no_pageset2_if_unneeded", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_NO_PS2_IF_UNNEEDED, 0),
+ SYSFS_STRING("binary_signature", SYSFS_READONLY,
+ tuxonice_signature, 9, 0, NULL),
+ SYSFS_INT("max_workers", SYSFS_RW, &toi_max_workers, 0, NR_CPUS, 0,
+ NULL),
+#ifdef CONFIG_KGDB
+ SYSFS_BIT("post_resume_breakpoint", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_POST_RESUME_BREAKPOINT, 0),
+#endif
+ SYSFS_BIT("no_readahead", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_NO_READAHEAD, 0),
+ SYSFS_BIT("trace_debug_on", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_TRACE_DEBUG_ON, 0),
+#ifdef CONFIG_TOI_KEEP_IMAGE
+ SYSFS_BIT("keep_image", SYSFS_RW , &toi_bkd.toi_action, TOI_KEEP_IMAGE,
+ 0),
+#endif
+#ifdef CONFIG_TOI_INCREMENTAL
+ SYSFS_CUSTOM("pagestate", SYSFS_READONLY, get_toi_page_state, NULL, 0,
+ NULL),
+ SYSFS_BIT("incremental", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_INCREMENTAL_IMAGE, 1),
+#endif
+};
+
+static struct toi_core_fns my_fns = {
+ .get_nonconflicting_page = __toi_get_nonconflicting_page,
+ .post_context_save = __toi_post_context_save,
+ .try_hibernate = toi_try_hibernate,
+ .try_resume = toi_sys_power_disk_try_resume,
+};
+
+/**
+ * core_load - initialisation of TuxOnIce core
+ *
+ * Initialise the core, beginning with sysfs. Checksum and so on are part of
+ * the core, but have their own initialisation routines because they either
+ * aren't compiled in all the time or have their own subdirectories.
+ **/
+static __init int core_load(void)
+{
+ int i,
+ numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+ printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
+ " (http://tuxonice.net)\n");
+
+ if (!hibernation_available()) {
+ printk(KERN_INFO "TuxOnIce disabled due to request for hibernation"
+ " to be disabled in this kernel.\n");
+ return 1;
+ }
+
+ if (toi_sysfs_init())
+ return 1;
+
+ for (i = 0; i < numfiles; i++)
+ toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+ toi_core_fns = &my_fns;
+
+ if (toi_alloc_init())
+ return 1;
+ if (toi_checksum_init())
+ return 1;
+ if (toi_usm_init())
+ return 1;
+ if (toi_ui_init())
+ return 1;
+ if (toi_poweroff_init())
+ return 1;
+ if (toi_cluster_init())
+ return 1;
+ if (toi_cbw_init())
+ return 1;
+
+ return 0;
+}
+
+late_initcall(core_load);
diff --git a/kernel/power/tuxonice_incremental.c b/kernel/power/tuxonice_incremental.c
new file mode 100644
index 000000000..c5a09789e
--- /dev/null
+++ b/kernel/power/tuxonice_incremental.c
@@ -0,0 +1,402 @@
+/*
+ * kernel/power/tuxonice_incremental.c
+ *
+ * Copyright (C) 2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines related to storing incremental images - that
+ * is, retaining an image after an initial cycle and then storing incremental
+ * changes on subsequent hibernations.
+ *
+ * Based in part on on...
+ *
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/mm.h>
+#include <linux/tuxonice.h>
+#include <linux/sched.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include "tuxonice_pageflags.h"
+#include "tuxonice_builtin.h"
+#include "power.h"
+
+int toi_do_incremental_initcall;
+
+extern void kdb_init(int level);
+extern noinline void kgdb_breakpoint(void);
+
+#undef pr_debug
+#if 0
+#define pr_debug(a, b...) do { printk(a, ##b); } while(0)
+#else
+#define pr_debug(a, b...) do { } while(0)
+#endif
+
+/* Multipliers for offsets within the PTEs */
+#define PTE_LEVEL_MULT (PAGE_SIZE)
+#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
+#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+
+/*
+ * This function gets called on a break in a continuous series
+ * of PTE entries; the next one is different so we need to
+ * print what we collected so far.
+ */
+static void note_page(void *addr)
+{
+ static struct page *lastpage;
+ struct page *page;
+
+ page = virt_to_page(addr);
+
+ if (page != lastpage) {
+ unsigned int level;
+ pte_t *pte = lookup_address((unsigned long) addr, &level);
+ struct page *pt_page2 = pte_page(*pte);
+ //debug("Note page %p (=> %p => %p|%ld).\n", addr, pte, pt_page2, page_to_pfn(pt_page2));
+ SetPageTOI_Untracked(pt_page2);
+ lastpage = page;
+ }
+}
+
+static void walk_pte_level(pmd_t addr)
+{
+ int i;
+ pte_t *start;
+
+ start = (pte_t *) pmd_page_vaddr(addr);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ note_page(start);
+ start++;
+ }
+}
+
+#if PTRS_PER_PMD > 1
+
+static void walk_pmd_level(pud_t addr)
+{
+ int i;
+ pmd_t *start;
+
+ start = (pmd_t *) pud_page_vaddr(addr);
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(*start)) {
+ if (pmd_large(*start) || !pmd_present(*start))
+ note_page(start);
+ else
+ walk_pte_level(*start);
+ } else
+ note_page(start);
+ start++;
+ }
+}
+
+#else
+#define walk_pmd_level(a) walk_pte_level(__pmd(pud_val(a)))
+#define pud_large(a) pmd_large(__pmd(pud_val(a)))
+#define pud_none(a) pmd_none(__pmd(pud_val(a)))
+#endif
+
+#if PTRS_PER_PUD > 1
+
+static void walk_pud_level(pgd_t addr)
+{
+ int i;
+ pud_t *start;
+
+ start = (pud_t *) pgd_page_vaddr(addr);
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ if (!pud_none(*start)) {
+ if (pud_large(*start) || !pud_present(*start))
+ note_page(start);
+ else
+ walk_pmd_level(*start);
+ } else
+ note_page(start);
+
+ start++;
+ }
+}
+
+#else
+#define walk_pud_level(a) walk_pmd_level(__pud(pgd_val(a)))
+#define pgd_large(a) pud_large(__pud(pgd_val(a)))
+#define pgd_none(a) pud_none(__pud(pgd_val(a)))
+#endif
+
+/*
+ * Not static in the original at the time of writing, so needs renaming here.
+ */
+static void toi_ptdump_walk_pgd_level(pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+ pgd_t *start = (pgd_t *) &init_level4_pgt;
+#else
+ pgd_t *start = swapper_pg_dir;
+#endif
+ int i;
+ if (pgd) {
+ start = pgd;
+ }
+
+ for (i = 0; i < PTRS_PER_PGD; i++) {
+ if (!pgd_none(*start)) {
+ if (pgd_large(*start) || !pgd_present(*start))
+ note_page(start);
+ else
+ walk_pud_level(*start);
+ } else
+ note_page(start);
+
+ start++;
+ }
+
+ /* Flush out the last page */
+ note_page(start);
+}
+
+#ifdef CONFIG_PARAVIRT
+extern struct pv_info pv_info;
+
+static void toi_set_paravirt_ops_untracked(void) {
+ int i;
+
+ unsigned long pvpfn = page_to_pfn(virt_to_page(__parainstructions)),
+ pvpfn_end = page_to_pfn(virt_to_page(__parainstructions_end));
+ //debug(KERN_EMERG ".parainstructions goes from pfn %ld to %ld.\n", pvpfn, pvpfn_end);
+ for (i = pvpfn; i <= pvpfn_end; i++) {
+ SetPageTOI_Untracked(pfn_to_page(i));
+ }
+}
+#else
+#define toi_set_paravirt_ops_untracked() { do { } while(0) }
+#endif
+
+extern void toi_mark_per_cpus_pages_untracked(void);
+
+void toi_untrack_stack(unsigned long *stack)
+{
+ int i;
+ struct page *stack_page = virt_to_page(stack);
+
+ for (i = 0; i < (1 << THREAD_SIZE_ORDER); i++) {
+ pr_debug("Untrack stack page %p.\n", page_address(stack_page + i));
+ SetPageTOI_Untracked(stack_page + i);
+ }
+}
+void toi_untrack_process(struct task_struct *p)
+{
+ SetPageTOI_Untracked(virt_to_page(p));
+ pr_debug("Untrack process %d page %p.\n", p->pid, page_address(virt_to_page(p)));
+
+ toi_untrack_stack(p->stack);
+}
+
+void toi_generate_untracked_map(void)
+{
+ struct task_struct *p, *t;
+ struct page *page;
+ pte_t *pte;
+ int i;
+ unsigned int level;
+ static int been_here = 0;
+
+ if (been_here)
+ return;
+
+ been_here = 1;
+
+ /* Pagetable pages */
+ toi_ptdump_walk_pgd_level(NULL);
+
+ /* Printk buffer - not normally needed but can be helpful for debugging. */
+ //toi_set_logbuf_untracked();
+
+ /* Paravirt ops */
+ toi_set_paravirt_ops_untracked();
+
+ /* Task structs and stacks */
+ for_each_process_thread(p, t) {
+ toi_untrack_process(p);
+ //toi_untrack_stack((unsigned long *) t->thread.sp);
+ }
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct task_struct *idle = idle_task(i);
+
+ if (idle) {
+ pr_debug("Untrack idle process for CPU %d.\n", i);
+ toi_untrack_process(idle);
+ }
+
+ /* IRQ stack */
+ pr_debug("Untrack IRQ stack for CPU %d.\n", i);
+ toi_untrack_stack((unsigned long *)per_cpu(irq_stack_ptr, i));
+ }
+
+ /* Per CPU data */
+ //pr_debug("Untracking per CPU variable pages.\n");
+ toi_mark_per_cpus_pages_untracked();
+
+ /* Init stack - for bringing up secondary CPUs */
+ page = virt_to_page(init_stack);
+ for (i = 0; i < DIV_ROUND_UP(sizeof(init_stack), PAGE_SIZE); i++) {
+ SetPageTOI_Untracked(page + i);
+ }
+
+ pte = lookup_address((unsigned long) &mmu_cr4_features, &level);
+ SetPageTOI_Untracked(pte_page(*pte));
+ SetPageTOI_Untracked(virt_to_page(trampoline_cr4_features));
+}
+
+/**
+ * toi_reset_dirtiness_one
+ */
+
+void toi_reset_dirtiness_one(unsigned long pfn, int verbose)
+{
+ struct page *page = pfn_to_page(pfn);
+
+ /**
+ * Don't worry about whether the Dirty flag is
+ * already set. If this is our first call, it
+ * won't be.
+ */
+
+ preempt_disable();
+
+ ClearPageTOI_Dirty(page);
+ SetPageTOI_RO(page);
+ if (verbose)
+ printk(KERN_EMERG "Making page %ld (%p|%p) read only.\n", pfn, page, page_address(page));
+
+ set_memory_ro((unsigned long) page_address(page), 1);
+
+ preempt_enable();
+}
+
+/**
+ * TuxOnIce's incremental image support works by marking all memory apart from
+ * the page tables read-only, then in the page-faults that result enabling
+ * writing if appropriate and flagging the page as dirty. Free pages are also
+ * marked as dirty and not protected so that if allocated, they will be included
+ * in the image without further processing.
+ *
+ * toi_reset_dirtiness is called when and image exists and incremental images are
+ * enabled, and each time we resume thereafter. It is not invoked on a fresh boot.
+ *
+ * This routine should be called from a single-cpu-running context to avoid races in setting
+ * page dirty/read only flags.
+ *
+ * TODO: Make "it is not invoked on a fresh boot" true when I've finished developing it!
+ *
+ * TODO: Consider Xen paravirt guest boot issues. See arch/x86/mm/pageattr.c.
+ **/
+
+int toi_reset_dirtiness(int verbose)
+{
+ struct zone *zone;
+ unsigned long loop;
+ int allocated_map = 0;
+
+ toi_generate_untracked_map();
+
+ if (!free_map) {
+ if (!toi_alloc_bitmap(&free_map))
+ return -ENOMEM;
+ allocated_map = 1;
+ }
+
+ toi_generate_free_page_map();
+
+ pr_debug(KERN_EMERG "Reset dirtiness.\n");
+ for_each_populated_zone(zone) {
+ // 64 bit only. No need to worry about highmem.
+ for (loop = 0; loop < zone->spanned_pages; loop++) {
+ unsigned long pfn = zone->zone_start_pfn + loop;
+ struct page *page;
+ int chunk_size;
+
+ if (!pfn_valid(pfn)) {
+ continue;
+ }
+
+ chunk_size = toi_size_of_free_region(zone, pfn);
+ if (chunk_size) {
+ loop += chunk_size - 1;
+ continue;
+ }
+
+ page = pfn_to_page(pfn);
+
+ if (PageNosave(page) || !saveable_page(zone, pfn)) {
+ continue;
+ }
+
+ if (PageTOI_Untracked(page)) {
+ continue;
+ }
+
+ /**
+ * Do we need to (re)protect the page?
+ * If it is already protected (PageTOI_RO), there is
+ * nothing to do - skip the following.
+ * If it is marked as dirty (PageTOI_Dirty), it was
+ * either free and has been allocated or has been
+ * written to and marked dirty. Reset the dirty flag
+ * and (re)apply the protection.
+ */
+ if (!PageTOI_RO(page)) {
+ toi_reset_dirtiness_one(pfn, verbose);
+ }
+ }
+ }
+
+ pr_debug(KERN_EMERG "Done resetting dirtiness.\n");
+
+ if (allocated_map) {
+ toi_free_bitmap(&free_map);
+ }
+ return 0;
+}
+
+static int toi_reset_dirtiness_initcall(void)
+{
+ if (toi_do_incremental_initcall) {
+ pr_info("TuxOnIce: Enabling dirty page tracking.\n");
+ toi_reset_dirtiness(0);
+ }
+ return 1;
+}
+extern void toi_generate_untracked_map(void);
+
+// Leave early_initcall for pages to register untracked sections.
+early_initcall(toi_reset_dirtiness_initcall);
+
+static int __init toi_incremental_initcall_setup(char *str)
+{
+ int value;
+
+ if (sscanf(str, "=%d", &value) && value)
+ toi_do_incremental_initcall = value;
+
+ return 1;
+}
+__setup("toi_incremental_initcall", toi_incremental_initcall_setup);
diff --git a/kernel/power/tuxonice_io.c b/kernel/power/tuxonice_io.c
new file mode 100644
index 000000000..91b0c4fd0
--- /dev/null
+++ b/kernel/power/tuxonice_io.c
@@ -0,0 +1,1932 @@
+/*
+ * kernel/power/tuxonice_io.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/version.h>
+#include <linux/utsname.h>
+#include <linux/mount.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/cpu.h>
+#include <linux/fs_struct.h>
+#include <linux/bio.h>
+#include <linux/fs_uuid.h>
+#include <linux/kmod.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_alloc.h"
+char alt_resume_param[256];
+
+/* Version read from image header at resume */
+static int toi_image_header_version;
+
+#define read_if_version(VERS, VAR, DESC, ERR_ACT) do { \
+ if (likely(toi_image_header_version >= VERS)) \
+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, \
+ (char *) &VAR, sizeof(VAR))) { \
+ abort_hibernate(TOI_FAILED_IO, "Failed to read DESC."); \
+ ERR_ACT; \
+ } \
+} while(0) \
+
+/* Variables shared between threads and updated under the mutex */
+static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
+static int io_index, io_nextupdate, io_pc, io_pc_step;
+static DEFINE_MUTEX(io_mutex);
+static DEFINE_PER_CPU(struct page *, last_sought);
+static DEFINE_PER_CPU(struct page *, last_high_page);
+static DEFINE_PER_CPU(char *, checksum_locn);
+static DEFINE_PER_CPU(struct pbe *, last_low_page);
+static atomic_t io_count;
+atomic_t toi_io_workers;
+
+static int using_flusher;
+
+DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
+
+int toi_bio_queue_flusher_should_finish;
+
+int toi_max_workers;
+
+static char *image_version_error = "The image header version is newer than " \
+ "this kernel supports.";
+
+struct toi_module_ops *first_filter;
+
+static atomic_t toi_num_other_threads;
+static DECLARE_WAIT_QUEUE_HEAD(toi_worker_wait_queue);
+enum toi_worker_commands {
+ TOI_IO_WORKER_STOP,
+ TOI_IO_WORKER_RUN,
+ TOI_IO_WORKER_EXIT
+};
+static enum toi_worker_commands toi_worker_command;
+
+/**
+ * toi_attempt_to_parse_resume_device - determine if we can hibernate
+ *
+ * Can we hibernate, using the current resume= parameter?
+ **/
+int toi_attempt_to_parse_resume_device(int quiet)
+{
+ struct list_head *Allocator;
+ struct toi_module_ops *thisAllocator;
+ int result, returning = 0;
+
+ if (toi_activate_storage(0))
+ return 0;
+
+ toiActiveAllocator = NULL;
+ clear_toi_state(TOI_RESUME_DEVICE_OK);
+ clear_toi_state(TOI_CAN_RESUME);
+ clear_result_state(TOI_ABORTED);
+
+ if (!toiNumAllocators) {
+ if (!quiet)
+ printk(KERN_INFO "TuxOnIce: No storage allocators have "
+ "been registered. Hibernating will be "
+ "disabled.\n");
+ goto cleanup;
+ }
+
+ list_for_each(Allocator, &toiAllocators) {
+ thisAllocator = list_entry(Allocator, struct toi_module_ops,
+ type_list);
+
+ /*
+ * Not sure why you'd want to disable an allocator, but
+ * we should honour the flag if we're providing it
+ */
+ if (!thisAllocator->enabled)
+ continue;
+
+ result = thisAllocator->parse_sig_location(
+ resume_file, (toiNumAllocators == 1),
+ quiet);
+
+ switch (result) {
+ case -EINVAL:
+ /* For this allocator, but not a valid
+ * configuration. Error already printed. */
+ goto cleanup;
+
+ case 0:
+ /* For this allocator and valid. */
+ toiActiveAllocator = thisAllocator;
+
+ set_toi_state(TOI_RESUME_DEVICE_OK);
+ set_toi_state(TOI_CAN_RESUME);
+ returning = 1;
+ goto cleanup;
+ }
+ }
+ if (!quiet)
+ printk(KERN_INFO "TuxOnIce: No matching enabled allocator "
+ "found. Resuming disabled.\n");
+cleanup:
+ toi_deactivate_storage(0);
+ return returning;
+}
+
+void attempt_to_parse_resume_device2(void)
+{
+ toi_prepare_usm();
+ toi_attempt_to_parse_resume_device(0);
+ toi_cleanup_usm();
+}
+
+void save_restore_alt_param(int replace, int quiet)
+{
+ static char resume_param_save[255];
+ static unsigned long toi_state_save;
+
+ if (replace) {
+ toi_state_save = toi_state;
+ strcpy(resume_param_save, resume_file);
+ strcpy(resume_file, alt_resume_param);
+ } else {
+ strcpy(resume_file, resume_param_save);
+ toi_state = toi_state_save;
+ }
+ toi_attempt_to_parse_resume_device(quiet);
+}
+
+void attempt_to_parse_alt_resume_param(void)
+{
+ int ok = 0;
+
+ /* Temporarily set resume_param to the poweroff value */
+ if (!strlen(alt_resume_param))
+ return;
+
+ printk(KERN_INFO "=== Trying Poweroff Resume2 ===\n");
+ save_restore_alt_param(SAVE, NOQUIET);
+ if (test_toi_state(TOI_CAN_RESUME))
+ ok = 1;
+
+ printk(KERN_INFO "=== Done ===\n");
+ save_restore_alt_param(RESTORE, QUIET);
+
+ /* If not ok, clear the string */
+ if (ok)
+ return;
+
+ printk(KERN_INFO "Can't resume from that location; clearing "
+ "alt_resume_param.\n");
+ alt_resume_param[0] = '\0';
+}
+
+/**
+ * noresume_reset_modules - reset data structures in case of non resuming
+ *
+ * When we read the start of an image, modules (and especially the
+ * active allocator) might need to reset data structures if we
+ * decide to remove the image rather than resuming from it.
+ **/
+static void noresume_reset_modules(void)
+{
+ struct toi_module_ops *this_filter;
+
+ list_for_each_entry(this_filter, &toi_filters, type_list)
+ if (this_filter->noresume_reset)
+ this_filter->noresume_reset();
+
+ if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
+ toiActiveAllocator->noresume_reset();
+}
+
+/**
+ * fill_toi_header - fill the hibernate header structure
+ * @struct toi_header: Header data structure to be filled.
+ **/
+static int fill_toi_header(struct toi_header *sh)
+{
+ int i, error;
+
+ error = init_header((struct swsusp_info *) sh);
+ if (error)
+ return error;
+
+ sh->pagedir = pagedir1;
+ sh->pageset_2_size = pagedir2.size;
+ sh->param0 = toi_result;
+ sh->param1 = toi_bkd.toi_action;
+ sh->param2 = toi_bkd.toi_debug_state;
+ sh->param3 = toi_bkd.toi_default_console_level;
+ sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
+ for (i = 0; i < 4; i++)
+ sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
+ sh->bkd = boot_kernel_data_buffer;
+ return 0;
+}
+
+/**
+ * rw_init_modules - initialize modules
+ * @rw: Whether we are reading of writing an image.
+ * @which: Section of the image being processed.
+ *
+ * Iterate over modules, preparing the ones that will be used to read or write
+ * data.
+ **/
+static int rw_init_modules(int rw, int which)
+{
+ struct toi_module_ops *this_module;
+ /* Initialise page transformers */
+ list_for_each_entry(this_module, &toi_filters, type_list) {
+ if (!this_module->enabled)
+ continue;
+ if (this_module->rw_init && this_module->rw_init(rw, which)) {
+ abort_hibernate(TOI_FAILED_MODULE_INIT,
+ "Failed to initialize the %s filter.",
+ this_module->name);
+ return 1;
+ }
+ }
+
+ /* Initialise allocator */
+ if (toiActiveAllocator->rw_init(rw, which)) {
+ abort_hibernate(TOI_FAILED_MODULE_INIT,
+ "Failed to initialise the allocator.");
+ return 1;
+ }
+
+ /* Initialise other modules */
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled ||
+ this_module->type == FILTER_MODULE ||
+ this_module->type == WRITER_MODULE)
+ continue;
+ if (this_module->rw_init && this_module->rw_init(rw, which)) {
+ set_abort_result(TOI_FAILED_MODULE_INIT);
+ printk(KERN_INFO "Setting aborted flag due to module "
+ "init failure.\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * rw_cleanup_modules - cleanup modules
+ * @rw: Whether we are reading of writing an image.
+ *
+ * Cleanup components after reading or writing a set of pages.
+ * Only the allocator may fail.
+ **/
+static int rw_cleanup_modules(int rw)
+{
+ struct toi_module_ops *this_module;
+ int result = 0;
+
+ /* Cleanup other modules */
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled ||
+ this_module->type == FILTER_MODULE ||
+ this_module->type == WRITER_MODULE)
+ continue;
+ if (this_module->rw_cleanup)
+ result |= this_module->rw_cleanup(rw);
+ }
+
+ /* Flush data and cleanup */
+ list_for_each_entry(this_module, &toi_filters, type_list) {
+ if (!this_module->enabled)
+ continue;
+ if (this_module->rw_cleanup)
+ result |= this_module->rw_cleanup(rw);
+ }
+
+ result |= toiActiveAllocator->rw_cleanup(rw);
+
+ return result;
+}
+
+static struct page *copy_page_from_orig_page(struct page *orig_page, int is_high)
+{
+ int index, min, max;
+ struct page *high_page = NULL,
+ **my_last_high_page = raw_cpu_ptr(&last_high_page),
+ **my_last_sought = raw_cpu_ptr(&last_sought);
+ struct pbe *this, **my_last_low_page = raw_cpu_ptr(&last_low_page);
+ void *compare;
+
+ if (is_high) {
+ if (*my_last_sought && *my_last_high_page &&
+ *my_last_sought < orig_page)
+ high_page = *my_last_high_page;
+ else
+ high_page = (struct page *) restore_highmem_pblist;
+ this = (struct pbe *) kmap(high_page);
+ compare = orig_page;
+ } else {
+ if (*my_last_sought && *my_last_low_page &&
+ *my_last_sought < orig_page)
+ this = *my_last_low_page;
+ else
+ this = restore_pblist;
+ compare = page_address(orig_page);
+ }
+
+ *my_last_sought = orig_page;
+
+ /* Locate page containing pbe */
+ while (this[PBES_PER_PAGE - 1].next &&
+ this[PBES_PER_PAGE - 1].orig_address < compare) {
+ if (is_high) {
+ struct page *next_high_page = (struct page *)
+ this[PBES_PER_PAGE - 1].next;
+ kunmap(high_page);
+ this = kmap(next_high_page);
+ high_page = next_high_page;
+ } else
+ this = this[PBES_PER_PAGE - 1].next;
+ }
+
+ /* Do a binary search within the page */
+ min = 0;
+ max = PBES_PER_PAGE;
+ index = PBES_PER_PAGE / 2;
+ while (max - min) {
+ if (!this[index].orig_address ||
+ this[index].orig_address > compare)
+ max = index;
+ else if (this[index].orig_address == compare) {
+ if (is_high) {
+ struct page *page = this[index].address;
+ *my_last_high_page = high_page;
+ kunmap(high_page);
+ return page;
+ }
+ *my_last_low_page = this;
+ return virt_to_page(this[index].address);
+ } else
+ min = index;
+ index = ((max + min) / 2);
+ };
+
+ if (is_high)
+ kunmap(high_page);
+
+ abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
+ " orig page %p. This[min].orig_address=%p.\n", orig_page,
+ this[index].orig_address);
+ return NULL;
+}
+
+/**
+ * write_next_page - write the next page in a pageset
+ * @data_pfn: The pfn where the next data to write is located.
+ * @my_io_index: The index of the page in the pageset.
+ * @write_pfn: The pfn number to write in the image (where the data belongs).
+ *
+ * Get the pfn of the next page to write, map the page if necessary and do the
+ * write.
+ **/
+static int write_next_page(unsigned long *data_pfn, int *my_io_index,
+ unsigned long *write_pfn)
+{
+ struct page *page;
+ char **my_checksum_locn = raw_cpu_ptr(&checksum_locn);
+ int result = 0, was_present;
+
+ *data_pfn = memory_bm_next_pfn(io_map, 0);
+
+ /* Another thread could have beaten us to it. */
+ if (*data_pfn == BM_END_OF_MAP) {
+ if (atomic_read(&io_count)) {
+ printk(KERN_INFO "Ran out of pfns but io_count is "
+ "still %d.\n", atomic_read(&io_count));
+ BUG();
+ }
+ mutex_unlock(&io_mutex);
+ return -ENODATA;
+ }
+
+ *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
+
+ memory_bm_clear_bit(io_map, 0, *data_pfn);
+ page = pfn_to_page(*data_pfn);
+
+ was_present = kernel_page_present(page);
+ if (!was_present)
+ kernel_map_pages(page, 1, 1);
+
+ if (io_pageset == 1)
+ *write_pfn = memory_bm_next_pfn(pageset1_map, 0);
+ else {
+ *write_pfn = *data_pfn;
+ *my_checksum_locn = tuxonice_get_next_checksum();
+ }
+
+ TOI_TRACE_DEBUG(*data_pfn, "_PS%d_write %d", io_pageset, *my_io_index);
+
+ mutex_unlock(&io_mutex);
+
+ if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn))
+ return 1;
+
+ result = first_filter->write_page(*write_pfn, TOI_PAGE, page,
+ PAGE_SIZE);
+
+ if (!was_present)
+ kernel_map_pages(page, 1, 0);
+
+ return result;
+}
+
+/**
+ * read_next_page - read the next page in a pageset
+ * @my_io_index: The index of the page in the pageset.
+ * @write_pfn: The pfn in which the data belongs.
+ *
+ * Read a page of the image into our buffer. It can happen (here and in the
+ * write routine) that threads don't get run until after other CPUs have done
+ * all the work. This was the cause of the long standing issue with
+ * occasionally getting -ENODATA errors at the end of reading the image. We
+ * therefore need to check there's actually a page to read before trying to
+ * retrieve one.
+ **/
+
+static int read_next_page(int *my_io_index, unsigned long *write_pfn,
+ struct page *buffer)
+{
+ unsigned int buf_size = PAGE_SIZE;
+ unsigned long left = atomic_read(&io_count);
+
+ if (!left)
+ return -ENODATA;
+
+ /* Start off assuming the page we read isn't resaved */
+ *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
+
+ mutex_unlock(&io_mutex);
+
+ /*
+ * Are we aborting? If so, don't submit any more I/O as
+ * resetting the resume_attempted flag (from ui.c) will
+ * clear the bdev flags, making this thread oops.
+ */
+ if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+ atomic_dec(&toi_io_workers);
+ if (!atomic_read(&toi_io_workers)) {
+ /*
+ * So we can be sure we'll have memory for
+ * marking that we haven't resumed.
+ */
+ rw_cleanup_modules(READ);
+ set_toi_state(TOI_IO_STOPPED);
+ }
+ while (1)
+ schedule();
+ }
+
+ /*
+ * See toi_bio_read_page in tuxonice_bio.c:
+ * read the next page in the image.
+ */
+ return first_filter->read_page(write_pfn, TOI_PAGE, buffer, &buf_size);
+}
+
+static void use_read_page(unsigned long write_pfn, struct page *buffer)
+{
+ struct page *final_page = pfn_to_page(write_pfn),
+ *copy_page = final_page;
+ char *virt, *buffer_virt;
+ int was_present, cpu = smp_processor_id();
+ unsigned long idx = 0;
+
+ if (io_pageset == 1 && (!pageset1_copy_map ||
+ !memory_bm_test_bit(pageset1_copy_map, cpu, write_pfn))) {
+ int is_high = PageHighMem(final_page);
+ copy_page = copy_page_from_orig_page(is_high ? (void *) write_pfn : final_page, is_high);
+ }
+
+ if (!memory_bm_test_bit(io_map, cpu, write_pfn)) {
+ int test = !memory_bm_test_bit(io_map, cpu, write_pfn);
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Discard %ld (%d).", write_pfn, test);
+ mutex_lock(&io_mutex);
+ idx = atomic_add_return(1, &io_count);
+ mutex_unlock(&io_mutex);
+ return;
+ }
+
+ virt = kmap(copy_page);
+ buffer_virt = kmap(buffer);
+ was_present = kernel_page_present(copy_page);
+ if (!was_present)
+ kernel_map_pages(copy_page, 1, 1);
+ memcpy(virt, buffer_virt, PAGE_SIZE);
+ if (!was_present)
+ kernel_map_pages(copy_page, 1, 0);
+ kunmap(copy_page);
+ kunmap(buffer);
+ memory_bm_clear_bit(io_map, cpu, write_pfn);
+ TOI_TRACE_DEBUG(write_pfn, "_PS%d_read", io_pageset);
+}
+
+static unsigned long status_update(int writing, unsigned long done,
+ unsigned long ticks)
+{
+ int cs_index = writing ? 0 : 1;
+ unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks;
+ unsigned long msec = jiffies_to_msecs(abs(ticks_so_far));
+ unsigned long pgs_per_s, estimate = 0, pages_left;
+
+ if (msec) {
+ pages_left = io_barmax - done;
+ pgs_per_s = 1000 * done / msec;
+ if (pgs_per_s)
+ estimate = DIV_ROUND_UP(pages_left, pgs_per_s);
+ }
+
+ if (estimate && ticks > HZ / 2)
+ return toi_update_status(done, io_barmax,
+ " %d/%d MB (%lu sec left)",
+ MB(done+1), MB(io_barmax), estimate);
+
+ return toi_update_status(done, io_barmax, " %d/%d MB",
+ MB(done+1), MB(io_barmax));
+}
+
+/**
+ * worker_rw_loop - main loop to read/write pages
+ *
+ * The main I/O loop for reading or writing pages. The io_map bitmap is used to
+ * track the pages to read/write.
+ * If we are reading, the pages are loaded to their final (mapped) pfn.
+ * Data is non zero iff this is a thread started via start_other_threads.
+ * In that case, we stay in here until told to quit.
+ **/
+static int worker_rw_loop(void *data)
+{
+ unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4,
+ jif_index = 1, start_time = jiffies, thread_num;
+ int result = 0, my_io_index = 0, last_worker;
+ struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
+ cpumask_var_t orig_mask;
+
+ if (!alloc_cpumask_var(&orig_mask, GFP_KERNEL)) {
+ printk(KERN_EMERG "Failed to allocate cpumask for TuxOnIce I/O thread %ld.\n", (unsigned long) data);
+ result = -ENOMEM;
+ goto out;
+ }
+
+ cpumask_copy(orig_mask, tsk_cpus_allowed(current));
+
+ current->flags |= PF_NOFREEZE;
+
+top:
+ mutex_lock(&io_mutex);
+ thread_num = atomic_read(&toi_io_workers);
+
+ cpumask_copy(tsk_cpus_allowed(current), orig_mask);
+ schedule();
+
+ atomic_inc(&toi_io_workers);
+
+ while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
+ !(io_write && test_result_state(TOI_ABORTED)) &&
+ toi_worker_command == TOI_IO_WORKER_RUN) {
+ if (!thread_num && jiffies > next_jiffies) {
+ next_jiffies += HZ / 4;
+ if (toiActiveAllocator->update_throughput_throttle)
+ toiActiveAllocator->update_throughput_throttle(
+ jif_index);
+ jif_index++;
+ }
+
+ /*
+ * What page to use? If reading, don't know yet which page's
+ * data will be read, so always use the buffer. If writing,
+ * use the copy (Pageset1) or original page (Pageset2), but
+ * always write the pfn of the original page.
+ */
+ if (io_write)
+ result = write_next_page(&data_pfn, &my_io_index,
+ &write_pfn);
+ else /* Reading */
+ result = read_next_page(&my_io_index, &write_pfn,
+ buffer);
+
+ if (result) {
+ mutex_lock(&io_mutex);
+ /* Nothing to do? */
+ if (result == -ENODATA) {
+ toi_message(TOI_IO, TOI_VERBOSE, 0,
+ "Thread %d has no more work.",
+ smp_processor_id());
+ break;
+ }
+
+ io_result = result;
+
+ if (io_write) {
+ printk(KERN_INFO "Write chunk returned %d.\n",
+ result);
+ abort_hibernate(TOI_FAILED_IO,
+ "Failed to write a chunk of the "
+ "image.");
+ break;
+ }
+
+ if (io_pageset == 1) {
+ printk(KERN_ERR "\nBreaking out of I/O loop "
+ "because of result code %d.\n", result);
+ break;
+ }
+ panic("Read chunk returned (%d)", result);
+ }
+
+ /*
+ * Discard reads of resaved pages while reading ps2
+ * and unwanted pages while rereading ps2 when aborting.
+ */
+ if (!io_write) {
+ if (!PageResave(pfn_to_page(write_pfn)))
+ use_read_page(write_pfn, buffer);
+ else {
+ mutex_lock(&io_mutex);
+ toi_message(TOI_IO, TOI_VERBOSE, 0,
+ "Resaved %ld.", write_pfn);
+ atomic_inc(&io_count);
+ mutex_unlock(&io_mutex);
+ }
+ }
+
+ if (!thread_num) {
+ if(my_io_index + io_base > io_nextupdate)
+ io_nextupdate = status_update(io_write,
+ my_io_index + io_base,
+ jiffies - start_time);
+
+ if (my_io_index > io_pc) {
+ printk(KERN_CONT "...%d%%", 20 * io_pc_step);
+ io_pc_step++;
+ io_pc = io_finish_at * io_pc_step / 5;
+ }
+ }
+
+ toi_cond_pause(0, NULL);
+
+ /*
+ * Subtle: If there's less I/O still to be done than threads
+ * running, quit. This stops us doing I/O beyond the end of
+ * the image when reading.
+ *
+ * Possible race condition. Two threads could do the test at
+ * the same time; one should exit and one should continue.
+ * Therefore we take the mutex before comparing and exiting.
+ */
+
+ mutex_lock(&io_mutex);
+ }
+
+ last_worker = atomic_dec_and_test(&toi_io_workers);
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "%d workers left.", atomic_read(&toi_io_workers));
+ mutex_unlock(&io_mutex);
+
+ if ((unsigned long) data && toi_worker_command != TOI_IO_WORKER_EXIT) {
+ /* Were we the last thread and we're using a flusher thread? */
+ if (last_worker && using_flusher) {
+ toiActiveAllocator->finish_all_io();
+ }
+ /* First, if we're doing I/O, wait for it to finish */
+ wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_RUN);
+ /* Then wait to be told what to do next */
+ wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_STOP);
+ if (toi_worker_command == TOI_IO_WORKER_RUN)
+ goto top;
+ }
+
+ if (thread_num)
+ atomic_dec(&toi_num_other_threads);
+
+out:
+ toi_message(TOI_IO, TOI_LOW, 0, "Thread %d exiting.", thread_num);
+ toi__free_page(28, buffer);
+ free_cpumask_var(orig_mask);
+
+ return result;
+}
+
+int toi_start_other_threads(void)
+{
+ int cpu;
+ struct task_struct *p;
+ int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1;
+ unsigned long num_started = 0;
+
+ if (test_action_state(TOI_NO_MULTITHREADED_IO))
+ return 0;
+
+ toi_worker_command = TOI_IO_WORKER_STOP;
+
+ for_each_online_cpu(cpu) {
+ if (num_started == to_start)
+ break;
+
+ if (cpu == smp_processor_id())
+ continue;
+
+ p = kthread_create_on_node(worker_rw_loop, (void *) num_started + 1,
+ cpu_to_node(cpu), "ktoi_io/%d", cpu);
+ if (IS_ERR(p)) {
+ printk(KERN_ERR "ktoi_io for %i failed\n", cpu);
+ continue;
+ }
+ kthread_bind(p, cpu);
+ p->flags |= PF_MEMALLOC;
+ wake_up_process(p);
+ num_started++;
+ atomic_inc(&toi_num_other_threads);
+ }
+
+ toi_message(TOI_IO, TOI_LOW, 0, "Started %d threads.", num_started);
+ return num_started;
+}
+
+void toi_stop_other_threads(void)
+{
+ toi_message(TOI_IO, TOI_LOW, 0, "Stopping other threads.");
+ toi_worker_command = TOI_IO_WORKER_EXIT;
+ wake_up(&toi_worker_wait_queue);
+}
+
+/**
+ * do_rw_loop - main highlevel function for reading or writing pages
+ *
+ * Create the io_map bitmap and call worker_rw_loop to perform I/O operations.
+ **/
+static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
+ int base, int barmax, int pageset)
+{
+ int index = 0, cpu, result = 0, workers_started;
+ unsigned long pfn, next;
+
+ first_filter = toi_get_next_filter(NULL);
+
+ if (!finish_at)
+ return 0;
+
+ io_write = write;
+ io_finish_at = finish_at;
+ io_base = base;
+ io_barmax = barmax;
+ io_pageset = pageset;
+ io_index = 0;
+ io_pc = io_finish_at / 5;
+ io_pc_step = 1;
+ io_result = 0;
+ io_nextupdate = base + 1;
+ toi_bio_queue_flusher_should_finish = 0;
+
+ for_each_online_cpu(cpu) {
+ per_cpu(last_sought, cpu) = NULL;
+ per_cpu(last_low_page, cpu) = NULL;
+ per_cpu(last_high_page, cpu) = NULL;
+ }
+
+ /* Ensure all bits clear */
+ memory_bm_clear(io_map);
+
+ memory_bm_position_reset(io_map);
+ next = memory_bm_next_pfn(io_map, 0);
+
+ BUG_ON(next != BM_END_OF_MAP);
+
+ /* Set the bits for the pages to write */
+ memory_bm_position_reset(pageflags);
+
+ pfn = memory_bm_next_pfn(pageflags, 0);
+ toi_trace_index++;
+
+ while (pfn != BM_END_OF_MAP && index < finish_at) {
+ TOI_TRACE_DEBUG(pfn, "_io_pageset_%d (%d/%d)", pageset, index + 1, finish_at);
+ memory_bm_set_bit(io_map, 0, pfn);
+ pfn = memory_bm_next_pfn(pageflags, 0);
+ index++;
+ }
+
+ BUG_ON(next != BM_END_OF_MAP || index < finish_at);
+
+ memory_bm_position_reset(io_map);
+ toi_trace_index++;
+
+ atomic_set(&io_count, finish_at);
+
+ memory_bm_position_reset(pageset1_map);
+
+ mutex_lock(&io_mutex);
+
+ clear_toi_state(TOI_IO_STOPPED);
+
+ using_flusher = (atomic_read(&toi_num_other_threads) &&
+ toiActiveAllocator->io_flusher &&
+ !test_action_state(TOI_NO_FLUSHER_THREAD));
+
+ workers_started = atomic_read(&toi_num_other_threads);
+
+ memory_bm_position_reset(io_map);
+ memory_bm_position_reset(pageset1_copy_map);
+
+ toi_worker_command = TOI_IO_WORKER_RUN;
+ wake_up(&toi_worker_wait_queue);
+
+ mutex_unlock(&io_mutex);
+
+ if (using_flusher)
+ result = toiActiveAllocator->io_flusher(write);
+ else
+ worker_rw_loop(NULL);
+
+ while (atomic_read(&toi_io_workers))
+ schedule();
+
+ printk(KERN_CONT "\n");
+
+ toi_worker_command = TOI_IO_WORKER_STOP;
+ wake_up(&toi_worker_wait_queue);
+
+ if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+ if (!atomic_read(&toi_io_workers)) {
+ rw_cleanup_modules(READ);
+ set_toi_state(TOI_IO_STOPPED);
+ }
+ while (1)
+ schedule();
+ }
+ set_toi_state(TOI_IO_STOPPED);
+
+ if (!io_result && !result && !test_result_state(TOI_ABORTED)) {
+ unsigned long next;
+
+ toi_update_status(io_base + io_finish_at, io_barmax,
+ " %d/%d MB ",
+ MB(io_base + io_finish_at), MB(io_barmax));
+
+ memory_bm_position_reset(io_map);
+ next = memory_bm_next_pfn(io_map, 0);
+ if (next != BM_END_OF_MAP) {
+ printk(KERN_INFO "Finished I/O loop but still work to "
+ "do?\nFinish at = %d. io_count = %d.\n",
+ finish_at, atomic_read(&io_count));
+ printk(KERN_INFO "I/O bitmap still records work to do."
+ "%ld.\n", next);
+ BUG();
+ do {
+ cpu_relax();
+ } while (0);
+ }
+ }
+
+ return io_result ? io_result : result;
+}
+
+/**
+ * write_pageset - write a pageset to disk.
+ * @pagedir: Which pagedir to write.
+ *
+ * Returns:
+ * Zero on success or -1 on failure.
+ **/
+int write_pageset(struct pagedir *pagedir)
+{
+ int finish_at, base = 0;
+ int barmax = pagedir1.size + pagedir2.size;
+ long error = 0;
+ struct memory_bitmap *pageflags;
+ unsigned long start_time, end_time;
+
+ /*
+ * Even if there is nothing to read or write, the allocator
+ * may need the init/cleanup for it's housekeeping. (eg:
+ * Pageset1 may start where pageset2 ends when writing).
+ */
+ finish_at = pagedir->size;
+
+ if (pagedir->id == 1) {
+ toi_prepare_status(DONT_CLEAR_BAR,
+ "Writing kernel & process data...");
+ base = pagedir2.size;
+ if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+ test_action_state(TOI_TEST_BIO))
+ pageflags = pageset1_map;
+ else
+ pageflags = pageset1_copy_map;
+ } else {
+ toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
+ pageflags = pageset2_map;
+ }
+
+ start_time = jiffies;
+
+ if (rw_init_modules(WRITE, pagedir->id)) {
+ abort_hibernate(TOI_FAILED_MODULE_INIT,
+ "Failed to initialise modules for writing.");
+ error = 1;
+ }
+
+ if (!error)
+ error = do_rw_loop(WRITE, finish_at, pageflags, base, barmax,
+ pagedir->id);
+
+ if (rw_cleanup_modules(WRITE) && !error) {
+ abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+ "Failed to cleanup after writing.");
+ error = 1;
+ }
+
+ end_time = jiffies;
+
+ if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+ toi_bkd.toi_io_time[0][0] += finish_at,
+ toi_bkd.toi_io_time[0][1] += (end_time - start_time);
+ }
+
+ return error;
+}
+
+/**
+ * read_pageset - highlevel function to read a pageset from disk
+ * @pagedir: pageset to read
+ * @overwrittenpagesonly: Whether to read the whole pageset or
+ * only part of it.
+ *
+ * Returns:
+ * Zero on success or -1 on failure.
+ **/
+static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
+{
+ int result = 0, base = 0;
+ int finish_at = pagedir->size;
+ int barmax = pagedir1.size + pagedir2.size;
+ struct memory_bitmap *pageflags;
+ unsigned long start_time, end_time;
+
+ if (pagedir->id == 1) {
+ toi_prepare_status(DONT_CLEAR_BAR,
+ "Reading kernel & process data...");
+ pageflags = pageset1_map;
+ } else {
+ toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
+ if (overwrittenpagesonly) {
+ barmax = min(pagedir1.size, pagedir2.size);
+ finish_at = min(pagedir1.size, pagedir2.size);
+ } else
+ base = pagedir1.size;
+ pageflags = pageset2_map;
+ }
+
+ start_time = jiffies;
+
+ if (rw_init_modules(READ, pagedir->id)) {
+ toiActiveAllocator->remove_image();
+ result = 1;
+ } else
+ result = do_rw_loop(READ, finish_at, pageflags, base, barmax,
+ pagedir->id);
+
+ if (rw_cleanup_modules(READ) && !result) {
+ abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+ "Failed to cleanup after reading.");
+ result = 1;
+ }
+
+ /* Statistics */
+ end_time = jiffies;
+
+ if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+ toi_bkd.toi_io_time[1][0] += finish_at,
+ toi_bkd.toi_io_time[1][1] += (end_time - start_time);
+ }
+
+ return result;
+}
+
+/**
+ * write_module_configs - store the modules configuration
+ *
+ * The configuration for each module is stored in the image header.
+ * Returns: Int
+ * Zero on success, Error value otherwise.
+ **/
+static int write_module_configs(void)
+{
+ struct toi_module_ops *this_module;
+ char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
+ int len, index = 1;
+ struct toi_module_header toi_module_header;
+
+ if (!buffer) {
+ printk(KERN_INFO "Failed to allocate a buffer for saving "
+ "module configuration info.\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * We have to know which data goes with which module, so we at
+ * least write a length of zero for a module. Note that we are
+ * also assuming every module's config data takes <= PAGE_SIZE.
+ */
+
+ /* For each module (in registration order) */
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled || !this_module->storage_needed ||
+ (this_module->type == WRITER_MODULE &&
+ toiActiveAllocator != this_module))
+ continue;
+
+ /* Get the data from the module */
+ len = 0;
+ if (this_module->save_config_info)
+ len = this_module->save_config_info(buffer);
+
+ /* Save the details of the module */
+ toi_module_header.enabled = this_module->enabled;
+ toi_module_header.type = this_module->type;
+ toi_module_header.index = index++;
+ strncpy(toi_module_header.name, this_module->name,
+ sizeof(toi_module_header.name));
+ toiActiveAllocator->rw_header_chunk(WRITE,
+ this_module,
+ (char *) &toi_module_header,
+ sizeof(toi_module_header));
+
+ /* Save the size of the data and any data returned */
+ toiActiveAllocator->rw_header_chunk(WRITE,
+ this_module,
+ (char *) &len, sizeof(int));
+ if (len)
+ toiActiveAllocator->rw_header_chunk(
+ WRITE, this_module, buffer, len);
+ }
+
+ /* Write a blank header to terminate the list */
+ toi_module_header.name[0] = '\0';
+ toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ (char *) &toi_module_header, sizeof(toi_module_header));
+
+ toi_free_page(22, (unsigned long) buffer);
+ return 0;
+}
+
+/**
+ * read_one_module_config - read and configure one module
+ *
+ * Read the configuration for one module, and configure the module
+ * to match if it is loaded.
+ *
+ * Returns: Int
+ * Zero on success, Error value otherwise.
+ **/
+static int read_one_module_config(struct toi_module_header *header)
+{
+ struct toi_module_ops *this_module;
+ int result, len;
+ char *buffer;
+
+ /* Find the module */
+ this_module = toi_find_module_given_name(header->name);
+
+ if (!this_module) {
+ if (header->enabled) {
+ toi_early_boot_message(1, TOI_CONTINUE_REQ,
+ "It looks like we need module %s for reading "
+ "the image but it hasn't been registered.\n",
+ header->name);
+ if (!(test_toi_state(TOI_CONTINUE_REQ)))
+ return -EINVAL;
+ } else
+ printk(KERN_INFO "Module %s configuration data found, "
+ "but the module hasn't registered. Looks like "
+ "it was disabled, so we're ignoring its data.",
+ header->name);
+ }
+
+ /* Get the length of the data (if any) */
+ result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &len,
+ sizeof(int));
+ if (result) {
+ printk(KERN_ERR "Failed to read the length of the module %s's"
+ " configuration data.\n",
+ header->name);
+ return -EINVAL;
+ }
+
+ /* Read any data and pass to the module (if we found one) */
+ if (!len)
+ return 0;
+
+ buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
+
+ if (!buffer) {
+ printk(KERN_ERR "Failed to allocate a buffer for reloading "
+ "module configuration info.\n");
+ return -ENOMEM;
+ }
+
+ toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
+
+ if (!this_module)
+ goto out;
+
+ if (!this_module->save_config_info)
+ printk(KERN_ERR "Huh? Module %s appears to have a "
+ "save_config_info, but not a load_config_info "
+ "function!\n", this_module->name);
+ else
+ this_module->load_config_info(buffer, len);
+
+ /*
+ * Now move this module to the tail of its lists. This will put it in
+ * order. Any new modules will end up at the top of the lists. They
+ * should have been set to disabled when loaded (people will
+ * normally not edit an initrd to load a new module and then hibernate
+ * without using it!).
+ */
+
+ toi_move_module_tail(this_module);
+
+ this_module->enabled = header->enabled;
+
+out:
+ toi_free_page(23, (unsigned long) buffer);
+ return 0;
+}
+
+/**
+ * read_module_configs - reload module configurations from the image header.
+ *
+ * Returns: Int
+ * Zero on success or an error code.
+ **/
+static int read_module_configs(void)
+{
+ int result = 0;
+ struct toi_module_header toi_module_header;
+ struct toi_module_ops *this_module;
+
+ /* All modules are initially disabled. That way, if we have a module
+ * loaded now that wasn't loaded when we hibernated, it won't be used
+ * in trying to read the data.
+ */
+ list_for_each_entry(this_module, &toi_modules, module_list)
+ this_module->enabled = 0;
+
+ /* Get the first module header */
+ result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+ (char *) &toi_module_header,
+ sizeof(toi_module_header));
+ if (result) {
+ printk(KERN_ERR "Failed to read the next module header.\n");
+ return -EINVAL;
+ }
+
+ /* For each module (in registration order) */
+ while (toi_module_header.name[0]) {
+ result = read_one_module_config(&toi_module_header);
+
+ if (result)
+ return -EINVAL;
+
+ /* Get the next module header */
+ result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+ (char *) &toi_module_header,
+ sizeof(toi_module_header));
+
+ if (result) {
+ printk(KERN_ERR "Failed to read the next module "
+ "header.\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev)
+{
+ return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1;
+}
+
+int fs_info_space_needed(void)
+{
+ const struct super_block *sb;
+ int result = sizeof(int);
+
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ struct fs_info *fs;
+
+ if (!sb->s_bdev)
+ continue;
+
+ fs = fs_info_from_block_dev(sb->s_bdev);
+ if (save_fs_info(fs, sb->s_bdev))
+ result += 16 + sizeof(dev_t) + sizeof(int) +
+ fs->last_mount_size;
+ free_fs_info(fs);
+ }
+ return result;
+}
+
+static int fs_info_num_to_save(void)
+{
+ const struct super_block *sb;
+ int to_save = 0;
+
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ struct fs_info *fs;
+
+ if (!sb->s_bdev)
+ continue;
+
+ fs = fs_info_from_block_dev(sb->s_bdev);
+ if (save_fs_info(fs, sb->s_bdev))
+ to_save++;
+ free_fs_info(fs);
+ }
+
+ return to_save;
+}
+
+static int fs_info_save(void)
+{
+ const struct super_block *sb;
+ int to_save = fs_info_num_to_save();
+
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *) &to_save,
+ sizeof(int))) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info"
+ " to save.");
+ return -EIO;
+ }
+
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ struct fs_info *fs;
+
+ if (!sb->s_bdev)
+ continue;
+
+ fs = fs_info_from_block_dev(sb->s_bdev);
+ if (save_fs_info(fs, sb->s_bdev)) {
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ &fs->uuid[0], 16)) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to "
+ "write uuid.");
+ return -EIO;
+ }
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ (char *) &fs->dev_t, sizeof(dev_t))) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to "
+ "write dev_t.");
+ return -EIO;
+ }
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ (char *) &fs->last_mount_size, sizeof(int))) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to "
+ "write last mount length.");
+ return -EIO;
+ }
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ fs->last_mount, fs->last_mount_size)) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to "
+ "write uuid.");
+ return -EIO;
+ }
+ }
+ free_fs_info(fs);
+ }
+ return 0;
+}
+
+static int fs_info_load_and_check_one(void)
+{
+ char uuid[16], *last_mount;
+ int result = 0, ln;
+ dev_t dev_t;
+ struct block_device *dev;
+ struct fs_info *fs_info, seek;
+
+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to read uuid.");
+ return -EIO;
+ }
+
+ read_if_version(3, dev_t, "uuid dev_t field", return -EIO);
+
+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &ln,
+ sizeof(int))) {
+ abort_hibernate(TOI_FAILED_IO,
+ "Failed to read last mount size.");
+ return -EIO;
+ }
+
+ last_mount = kzalloc(ln, GFP_KERNEL);
+
+ if (!last_mount)
+ return -ENOMEM;
+
+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount, ln)) {
+ abort_hibernate(TOI_FAILED_IO,
+ "Failed to read last mount timestamp.");
+ result = -EIO;
+ goto out_lmt;
+ }
+
+ strncpy((char *) &seek.uuid, uuid, 16);
+ seek.dev_t = dev_t;
+ seek.last_mount_size = ln;
+ seek.last_mount = last_mount;
+ dev_t = blk_lookup_fs_info(&seek);
+ if (!dev_t)
+ goto out_lmt;
+
+ dev = toi_open_by_devnum(dev_t);
+
+ fs_info = fs_info_from_block_dev(dev);
+ if (fs_info && !IS_ERR(fs_info)) {
+ if (ln != fs_info->last_mount_size) {
+ printk(KERN_EMERG "Found matching uuid but last mount "
+ "time lengths differ?! "
+ "(%d vs %d).\n", ln,
+ fs_info->last_mount_size);
+ result = -EINVAL;
+ } else {
+ char buf[BDEVNAME_SIZE];
+ result = !!memcmp(fs_info->last_mount, last_mount, ln);
+ if (result)
+ printk(KERN_EMERG "Last mount time for %s has "
+ "changed!\n", bdevname(dev, buf));
+ }
+ }
+ toi_close_bdev(dev);
+ free_fs_info(fs_info);
+out_lmt:
+ kfree(last_mount);
+ return result;
+}
+
+static int fs_info_load_and_check(void)
+{
+ int to_do, result = 0;
+
+ if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &to_do,
+ sizeof(int))) {
+ abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info "
+ "to load.");
+ return -EIO;
+ }
+
+ while(to_do--)
+ result |= fs_info_load_and_check_one();
+
+ return result;
+}
+
+/**
+ * write_image_header - write the image header after write the image proper
+ *
+ * Returns: Int
+ * Zero on success, error value otherwise.
+ **/
+int write_image_header(void)
+{
+ int ret;
+ int total = pagedir1.size + pagedir2.size+2;
+ char *header_buffer = NULL;
+
+ /* Now prepare to write the header */
+ ret = toiActiveAllocator->write_header_init();
+ if (ret) {
+ abort_hibernate(TOI_FAILED_MODULE_INIT,
+ "Active allocator's write_header_init"
+ " function failed.");
+ goto write_image_header_abort;
+ }
+
+ /* Get a buffer */
+ header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
+ if (!header_buffer) {
+ abort_hibernate(TOI_OUT_OF_MEMORY,
+ "Out of memory when trying to get page for header!");
+ goto write_image_header_abort;
+ }
+
+ /* Write hibernate header */
+ if (fill_toi_header((struct toi_header *) header_buffer)) {
+ abort_hibernate(TOI_OUT_OF_MEMORY,
+ "Failure to fill header information!");
+ goto write_image_header_abort;
+ }
+
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ header_buffer, sizeof(struct toi_header))) {
+ abort_hibernate(TOI_OUT_OF_MEMORY,
+ "Failure to write header info.");
+ goto write_image_header_abort;
+ }
+
+ if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+ (char *) &toi_max_workers, sizeof(toi_max_workers))) {
+ abort_hibernate(TOI_OUT_OF_MEMORY,
+ "Failure to number of workers to use.");
+ goto write_image_header_abort;
+ }
+
+ /* Write filesystem info */
+ if (fs_info_save())
+ goto write_image_header_abort;
+
+ /* Write module configurations */
+ ret = write_module_configs();
+ if (ret) {
+ abort_hibernate(TOI_FAILED_IO,
+ "Failed to write module configs.");
+ goto write_image_header_abort;
+ }
+
+ if (memory_bm_write(pageset1_map,
+ toiActiveAllocator->rw_header_chunk)) {
+ abort_hibernate(TOI_FAILED_IO,
+ "Failed to write bitmaps.");
+ goto write_image_header_abort;
+ }
+
+ /* Flush data and let allocator cleanup */
+ if (toiActiveAllocator->write_header_cleanup()) {
+ abort_hibernate(TOI_FAILED_IO,
+ "Failed to cleanup writing header.");
+ goto write_image_header_abort_no_cleanup;
+ }
+
+ if (test_result_state(TOI_ABORTED))
+ goto write_image_header_abort_no_cleanup;
+
+ toi_update_status(total, total, NULL);
+
+out:
+ if (header_buffer)
+ toi_free_page(24, (unsigned long) header_buffer);
+ return ret;
+
+write_image_header_abort:
+ toiActiveAllocator->write_header_cleanup();
+write_image_header_abort_no_cleanup:
+ ret = -1;
+ goto out;
+}
+
+/**
+ * sanity_check - check the header
+ * @sh: the header which was saved at hibernate time.
+ *
+ * Perform a few checks, seeking to ensure that the kernel being
+ * booted matches the one hibernated. They need to match so we can
+ * be _sure_ things will work. It is not absolutely impossible for
+ * resuming from a different kernel to work, just not assured.
+ **/
+static char *sanity_check(struct toi_header *sh)
+{
+ char *reason = check_image_kernel((struct swsusp_info *) sh);
+
+ if (reason)
+ return reason;
+
+ if (!test_action_state(TOI_IGNORE_ROOTFS)) {
+ const struct super_block *sb;
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if ((!(sb->s_flags & MS_RDONLY)) &&
+ (sb->s_type->fs_flags & FS_REQUIRES_DEV))
+ return "Device backed fs has been mounted "
+ "rw prior to resume or initrd/ramfs "
+ "is mounted rw.";
+ }
+ }
+
+ return NULL;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(freeze_wait);
+
+#define FREEZE_IN_PROGRESS (~0)
+
+static int freeze_result;
+
+static void do_freeze(struct work_struct *dummy)
+{
+ freeze_result = freeze_processes();
+ wake_up(&freeze_wait);
+ trap_non_toi_io = 1;
+}
+
+static DECLARE_WORK(freeze_work, do_freeze);
+
+/**
+ * __read_pageset1 - test for the existence of an image and attempt to load it
+ *
+ * Returns: Int
+ * Zero if image found and pageset1 successfully loaded.
+ * Error if no image found or loaded.
+ **/
+static int __read_pageset1(void)
+{
+ int i, result = 0;
+ char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
+ *sanity_error = NULL;
+ struct toi_header *toi_header;
+
+ if (!header_buffer) {
+ printk(KERN_INFO "Unable to allocate a page for reading the "
+ "signature.\n");
+ return -ENOMEM;
+ }
+
+ /* Check for an image */
+ result = toiActiveAllocator->image_exists(1);
+ if (result == 3) {
+ result = -ENODATA;
+ toi_early_boot_message(1, 0, "The signature from an older "
+ "version of TuxOnIce has been detected.");
+ goto out_remove_image;
+ }
+
+ if (result != 1) {
+ result = -ENODATA;
+ noresume_reset_modules();
+ printk(KERN_INFO "TuxOnIce: No image found.\n");
+ goto out;
+ }
+
+ /*
+ * Prepare the active allocator for reading the image header. The
+ * activate allocator might read its own configuration.
+ *
+ * NB: This call may never return because there might be a signature
+ * for a different image such that we warn the user and they choose
+ * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
+ * location of the image might be unavailable if it was stored on a
+ * network connection).
+ */
+
+ result = toiActiveAllocator->read_header_init();
+ if (result) {
+ printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
+ "image header.\n");
+ goto out_remove_image;
+ }
+
+ /* Check for noresume command line option */
+ if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
+ printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
+ "image.\n");
+ goto out_remove_image;
+ }
+
+ /* Check whether we've resumed before */
+ if (test_toi_state(TOI_RESUMED_BEFORE)) {
+ toi_early_boot_message(1, 0, NULL);
+ if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+ printk(KERN_INFO "TuxOnIce: Tried to resume before: "
+ "Invalidated image.\n");
+ goto out_remove_image;
+ }
+ }
+
+ clear_toi_state(TOI_CONTINUE_REQ);
+
+ toi_image_header_version = toiActiveAllocator->get_header_version();
+
+ if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) {
+ toi_early_boot_message(1, 0, image_version_error);
+ if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+ printk(KERN_INFO "TuxOnIce: Header version too new: "
+ "Invalidated image.\n");
+ goto out_remove_image;
+ }
+ }
+
+ /* Read hibernate header */
+ result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+ header_buffer, sizeof(struct toi_header));
+ if (result < 0) {
+ printk(KERN_ERR "TuxOnIce: Failed to read the image "
+ "signature.\n");
+ goto out_remove_image;
+ }
+
+ toi_header = (struct toi_header *) header_buffer;
+
+ /*
+ * NB: This call may also result in a reboot rather than returning.
+ */
+
+ sanity_error = sanity_check(toi_header);
+ if (sanity_error) {
+ toi_early_boot_message(1, TOI_CONTINUE_REQ,
+ sanity_error);
+ printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
+ goto out_remove_image;
+ }
+
+ /*
+ * We have an image and it looks like it will load okay.
+ *
+ * Get metadata from header. Don't override commandline parameters.
+ *
+ * We don't need to save the image size limit because it's not used
+ * during resume and will be restored with the image anyway.
+ */
+
+ memcpy((char *) &pagedir1,
+ (char *) &toi_header->pagedir, sizeof(pagedir1));
+ toi_result = toi_header->param0;
+ if (!toi_bkd.toi_debug_state) {
+ toi_bkd.toi_action =
+ (toi_header->param1 & ~toi_bootflags_mask) |
+ (toi_bkd.toi_action & toi_bootflags_mask);
+ toi_bkd.toi_debug_state = toi_header->param2;
+ toi_bkd.toi_default_console_level = toi_header->param3;
+ }
+ clear_toi_state(TOI_IGNORE_LOGLEVEL);
+ pagedir2.size = toi_header->pageset_2_size;
+ for (i = 0; i < 4; i++)
+ toi_bkd.toi_io_time[i/2][i%2] =
+ toi_header->io_time[i/2][i%2];
+
+ set_toi_state(TOI_BOOT_KERNEL);
+ boot_kernel_data_buffer = toi_header->bkd;
+
+ read_if_version(1, toi_max_workers, "TuxOnIce max workers",
+ goto out_remove_image);
+
+ /* Read filesystem info */
+ if (fs_info_load_and_check()) {
+ printk(KERN_EMERG "TuxOnIce: File system mount time checks "
+ "failed. Refusing to corrupt your filesystems!\n");
+ goto out_remove_image;
+ }
+
+ /* Read module configurations */
+ result = read_module_configs();
+ if (result) {
+ pagedir1.size = 0;
+ pagedir2.size = 0;
+ printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
+ "configurations.\n");
+ clear_action_state(TOI_KEEP_IMAGE);
+ goto out_remove_image;
+ }
+
+ toi_prepare_console();
+
+ set_toi_state(TOI_NOW_RESUMING);
+
+ result = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (result)
+ goto out_notifier_call_chain;;
+
+ if (usermodehelper_disable())
+ goto out_enable_usermodehelper;
+
+ current->flags |= PF_NOFREEZE;
+ freeze_result = FREEZE_IN_PROGRESS;
+
+ schedule_work_on(cpumask_first(cpu_online_mask), &freeze_work);
+
+ toi_cond_pause(1, "About to read original pageset1 locations.");
+
+ /*
+ * See _toi_rw_header_chunk in tuxonice_bio.c:
+ * Initialize pageset1_map by reading the map from the image.
+ */
+ if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk))
+ goto out_thaw;
+
+ /*
+ * See toi_rw_cleanup in tuxonice_bio.c:
+ * Clean up after reading the header.
+ */
+ result = toiActiveAllocator->read_header_cleanup();
+ if (result) {
+ printk(KERN_ERR "TuxOnIce: Failed to cleanup after reading the "
+ "image header.\n");
+ goto out_thaw;
+ }
+
+ toi_cond_pause(1, "About to read pagedir.");
+
+ /*
+ * Get the addresses of pages into which we will load the kernel to
+ * be copied back and check if they conflict with the ones we are using.
+ */
+ if (toi_get_pageset1_load_addresses()) {
+ printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
+ "pageset1.\n");
+ goto out_thaw;
+ }
+
+ /* Read the original kernel back */
+ toi_cond_pause(1, "About to read pageset 1.");
+
+ /* Given the pagemap, read back the data from disk */
+ if (read_pageset(&pagedir1, 0)) {
+ toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
+ result = -EIO;
+ goto out_thaw;
+ }
+
+ toi_cond_pause(1, "About to restore original kernel.");
+ result = 0;
+
+ if (!toi_keeping_image &&
+ toiActiveAllocator->mark_resume_attempted)
+ toiActiveAllocator->mark_resume_attempted(1);
+
+ wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
+out:
+ current->flags &= ~PF_NOFREEZE;
+ toi_free_page(25, (unsigned long) header_buffer);
+ return result;
+
+out_thaw:
+ wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
+ trap_non_toi_io = 0;
+ thaw_processes();
+out_enable_usermodehelper:
+ usermodehelper_enable();
+out_notifier_call_chain:
+ pm_notifier_call_chain(PM_POST_RESTORE);
+ toi_cleanup_console();
+out_remove_image:
+ result = -EINVAL;
+ if (!toi_keeping_image)
+ toiActiveAllocator->remove_image();
+ toiActiveAllocator->read_header_cleanup();
+ noresume_reset_modules();
+ goto out;
+}
+
+/**
+ * read_pageset1 - highlevel function to read the saved pages
+ *
+ * Attempt to read the header and pageset1 of a hibernate image.
+ * Handle the outcome, complaining where appropriate.
+ **/
+int read_pageset1(void)
+{
+ int error;
+
+ error = __read_pageset1();
+
+ if (error && error != -ENODATA && error != -EINVAL &&
+ !test_result_state(TOI_ABORTED))
+ abort_hibernate(TOI_IMAGE_ERROR,
+ "TuxOnIce: Error %d resuming\n", error);
+
+ return error;
+}
+
+/**
+ * get_have_image_data - check the image header
+ **/
+static char *get_have_image_data(void)
+{
+ char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
+ struct toi_header *toi_header;
+
+ if (!output_buffer) {
+ printk(KERN_INFO "Output buffer null.\n");
+ return NULL;
+ }
+
+ /* Check for an image */
+ if (!toiActiveAllocator->image_exists(1) ||
+ toiActiveAllocator->read_header_init() ||
+ toiActiveAllocator->rw_header_chunk(READ, NULL,
+ output_buffer, sizeof(struct toi_header))) {
+ sprintf(output_buffer, "0\n");
+ /*
+ * From an initrd/ramfs, catting have_image and
+ * getting a result of 0 is sufficient.
+ */
+ clear_toi_state(TOI_BOOT_TIME);
+ goto out;
+ }
+
+ toi_header = (struct toi_header *) output_buffer;
+
+ sprintf(output_buffer, "1\n%s\n%s\n",
+ toi_header->uts.machine,
+ toi_header->uts.version);
+
+ /* Check whether we've resumed before */
+ if (test_toi_state(TOI_RESUMED_BEFORE))
+ strcat(output_buffer, "Resumed before.\n");
+
+out:
+ noresume_reset_modules();
+ return output_buffer;
+}
+
+/**
+ * read_pageset2 - read second part of the image
+ * @overwrittenpagesonly: Read only pages which would have been
+ * verwritten by pageset1?
+ *
+ * Read in part or all of pageset2 of an image, depending upon
+ * whether we are hibernating and have only overwritten a portion
+ * with pageset1 pages, or are resuming and need to read them
+ * all.
+ *
+ * Returns: Int
+ * Zero if no error, otherwise the error value.
+ **/
+int read_pageset2(int overwrittenpagesonly)
+{
+ int result = 0;
+
+ if (!pagedir2.size)
+ return 0;
+
+ result = read_pageset(&pagedir2, overwrittenpagesonly);
+
+ toi_cond_pause(1, "Pagedir 2 read.");
+
+ return result;
+}
+
+/**
+ * image_exists_read - has an image been found?
+ * @page: Output buffer
+ *
+ * Store 0 or 1 in page, depending on whether an image is found.
+ * Incoming buffer is PAGE_SIZE and result is guaranteed
+ * to be far less than that, so we don't worry about
+ * overflow.
+ **/
+int image_exists_read(const char *page, int count)
+{
+ int len = 0;
+ char *result;
+
+ if (toi_activate_storage(0))
+ return count;
+
+ if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+ toi_attempt_to_parse_resume_device(0);
+
+ if (!toiActiveAllocator) {
+ len = sprintf((char *) page, "-1\n");
+ } else {
+ result = get_have_image_data();
+ if (result) {
+ len = sprintf((char *) page, "%s", result);
+ toi_free_page(26, (unsigned long) result);
+ }
+ }
+
+ toi_deactivate_storage(0);
+
+ return len;
+}
+
+/**
+ * image_exists_write - invalidate an image if one exists
+ **/
+int image_exists_write(const char *buffer, int count)
+{
+ if (toi_activate_storage(0))
+ return count;
+
+ if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
+ toiActiveAllocator->remove_image();
+
+ toi_deactivate_storage(0);
+
+ clear_result_state(TOI_KEPT_IMAGE);
+
+ return count;
+}
diff --git a/kernel/power/tuxonice_io.h b/kernel/power/tuxonice_io.h
new file mode 100644
index 000000000..56645a5c6
--- /dev/null
+++ b/kernel/power/tuxonice_io.h
@@ -0,0 +1,72 @@
+/*
+ * kernel/power/tuxonice_io.h
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/utsname.h>
+#include "tuxonice_pagedir.h"
+
+/* Non-module data saved in our image header */
+struct toi_header {
+ /*
+ * Mirror struct swsusp_info, but without
+ * the page aligned attribute
+ */
+ struct new_utsname uts;
+ u32 version_code;
+ unsigned long num_physpages;
+ int cpus;
+ unsigned long image_pages;
+ unsigned long pages;
+ unsigned long size;
+
+ /* Our own data */
+ unsigned long orig_mem_free;
+ int page_size;
+ int pageset_2_size;
+ int param0;
+ int param1;
+ int param2;
+ int param3;
+ int progress0;
+ int progress1;
+ int progress2;
+ int progress3;
+ int io_time[2][2];
+ struct pagedir pagedir;
+ dev_t root_fs;
+ unsigned long bkd; /* Boot kernel data locn */
+};
+
+extern int write_pageset(struct pagedir *pagedir);
+extern int write_image_header(void);
+extern int read_pageset1(void);
+extern int read_pageset2(int overwrittenpagesonly);
+
+extern int toi_attempt_to_parse_resume_device(int quiet);
+extern void attempt_to_parse_resume_device2(void);
+extern void attempt_to_parse_alt_resume_param(void);
+int image_exists_read(const char *page, int count);
+int image_exists_write(const char *buffer, int count);
+extern void save_restore_alt_param(int replace, int quiet);
+extern atomic_t toi_io_workers;
+
+/* Args to save_restore_alt_param */
+#define RESTORE 0
+#define SAVE 1
+
+#define NOQUIET 0
+#define QUIET 1
+
+extern wait_queue_head_t toi_io_queue_flusher;
+extern int toi_bio_queue_flusher_should_finish;
+
+int fs_info_space_needed(void);
+
+extern int toi_max_workers;
diff --git a/kernel/power/tuxonice_modules.c b/kernel/power/tuxonice_modules.c
new file mode 100644
index 000000000..18f22bdb6
--- /dev/null
+++ b/kernel/power/tuxonice_modules.c
@@ -0,0 +1,520 @@
+/*
+ * kernel/power/tuxonice_modules.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+
+LIST_HEAD(toi_filters);
+LIST_HEAD(toiAllocators);
+
+LIST_HEAD(toi_modules);
+
+struct toi_module_ops *toiActiveAllocator;
+
+static int toi_num_filters;
+int toiNumAllocators, toi_num_modules;
+
+/*
+ * toi_header_storage_for_modules
+ *
+ * Returns the amount of space needed to store configuration
+ * data needed by the modules prior to copying back the original
+ * kernel. We can exclude data for pageset2 because it will be
+ * available anyway once the kernel is copied back.
+ */
+long toi_header_storage_for_modules(void)
+{
+ struct toi_module_ops *this_module;
+ int bytes = 0;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled ||
+ (this_module->type == WRITER_MODULE &&
+ toiActiveAllocator != this_module))
+ continue;
+ if (this_module->storage_needed) {
+ int this = this_module->storage_needed() +
+ sizeof(struct toi_module_header) +
+ sizeof(int);
+ this_module->header_requested = this;
+ bytes += this;
+ }
+ }
+
+ /* One more for the empty terminator */
+ return bytes + sizeof(struct toi_module_header);
+}
+
+void print_toi_header_storage_for_modules(void)
+{
+ struct toi_module_ops *this_module;
+ int bytes = 0;
+
+ printk(KERN_DEBUG "Header storage:\n");
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled ||
+ (this_module->type == WRITER_MODULE &&
+ toiActiveAllocator != this_module))
+ continue;
+ if (this_module->storage_needed) {
+ int this = this_module->storage_needed() +
+ sizeof(struct toi_module_header) +
+ sizeof(int);
+ this_module->header_requested = this;
+ bytes += this;
+ printk(KERN_DEBUG "+ %16s : %-4d/%d.\n",
+ this_module->name,
+ this_module->header_used, this);
+ }
+ }
+
+ printk(KERN_DEBUG "+ empty terminator : %zu.\n",
+ sizeof(struct toi_module_header));
+ printk(KERN_DEBUG " ====\n");
+ printk(KERN_DEBUG " %zu\n",
+ bytes + sizeof(struct toi_module_header));
+}
+
+/*
+ * toi_memory_for_modules
+ *
+ * Returns the amount of memory requested by modules for
+ * doing their work during the cycle.
+ */
+
+long toi_memory_for_modules(int print_parts)
+{
+ long bytes = 0, result;
+ struct toi_module_ops *this_module;
+
+ if (print_parts)
+ printk(KERN_INFO "Memory for modules:\n===================\n");
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ int this;
+ if (!this_module->enabled)
+ continue;
+ if (this_module->memory_needed) {
+ this = this_module->memory_needed();
+ if (print_parts)
+ printk(KERN_INFO "%10d bytes (%5ld pages) for "
+ "module '%s'.\n", this,
+ DIV_ROUND_UP(this, PAGE_SIZE),
+ this_module->name);
+ bytes += this;
+ }
+ }
+
+ result = DIV_ROUND_UP(bytes, PAGE_SIZE);
+ if (print_parts)
+ printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
+
+ return result;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Returns the compression ratio expected when saving the image.
+ */
+
+int toi_expected_compression_ratio(void)
+{
+ int ratio = 100;
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled)
+ continue;
+ if (this_module->expected_compression)
+ ratio = ratio * this_module->expected_compression()
+ / 100;
+ }
+
+ return ratio;
+}
+
+/* toi_find_module_given_dir
+ * Functionality : Return a module (if found), given a pointer
+ * to its directory name
+ */
+
+static struct toi_module_ops *toi_find_module_given_dir(char *name)
+{
+ struct toi_module_ops *this_module, *found_module = NULL;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!strcmp(name, this_module->directory)) {
+ found_module = this_module;
+ break;
+ }
+ }
+
+ return found_module;
+}
+
+/* toi_find_module_given_name
+ * Functionality : Return a module (if found), given a pointer
+ * to its name
+ */
+
+struct toi_module_ops *toi_find_module_given_name(char *name)
+{
+ struct toi_module_ops *this_module, *found_module = NULL;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!strcmp(name, this_module->name)) {
+ found_module = this_module;
+ break;
+ }
+ }
+
+ return found_module;
+}
+
+/*
+ * toi_print_module_debug_info
+ * Functionality : Get debugging info from modules into a buffer.
+ */
+int toi_print_module_debug_info(char *buffer, int buffer_size)
+{
+ struct toi_module_ops *this_module;
+ int len = 0;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled)
+ continue;
+ if (this_module->print_debug_info) {
+ int result;
+ result = this_module->print_debug_info(buffer + len,
+ buffer_size - len);
+ len += result;
+ }
+ }
+
+ /* Ensure null terminated */
+ buffer[buffer_size] = 0;
+
+ return len;
+}
+
+/*
+ * toi_register_module
+ *
+ * Register a module.
+ */
+int toi_register_module(struct toi_module_ops *module)
+{
+ int i;
+ struct kobject *kobj;
+
+ if (!hibernation_available())
+ return -ENODEV;
+
+ module->enabled = 1;
+
+ if (toi_find_module_given_name(module->name)) {
+ printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
+ " which is already registered.\n",
+ module->name);
+ return -EBUSY;
+ }
+
+ switch (module->type) {
+ case FILTER_MODULE:
+ list_add_tail(&module->type_list, &toi_filters);
+ toi_num_filters++;
+ break;
+ case WRITER_MODULE:
+ list_add_tail(&module->type_list, &toiAllocators);
+ toiNumAllocators++;
+ break;
+ case MISC_MODULE:
+ case MISC_HIDDEN_MODULE:
+ case BIO_ALLOCATOR_MODULE:
+ break;
+ default:
+ printk(KERN_ERR "Hmmm. Module '%s' has an invalid type."
+ " It has been ignored.\n", module->name);
+ return -EINVAL;
+ }
+ list_add_tail(&module->module_list, &toi_modules);
+ toi_num_modules++;
+
+ if ((!module->directory && !module->shared_directory) ||
+ !module->sysfs_data || !module->num_sysfs_entries)
+ return 0;
+
+ /*
+ * Modules may share a directory, but those with shared_dir
+ * set must be loaded (via symbol dependencies) after parents
+ * and unloaded beforehand.
+ */
+ if (module->shared_directory) {
+ struct toi_module_ops *shared =
+ toi_find_module_given_dir(module->shared_directory);
+ if (!shared) {
+ printk(KERN_ERR "TuxOnIce: Module %s wants to share "
+ "%s's directory but %s isn't loaded.\n",
+ module->name, module->shared_directory,
+ module->shared_directory);
+ toi_unregister_module(module);
+ return -ENODEV;
+ }
+ kobj = shared->dir_kobj;
+ } else {
+ if (!strncmp(module->directory, "[ROOT]", 6))
+ kobj = tuxonice_kobj;
+ else
+ kobj = make_toi_sysdir(module->directory);
+ }
+ module->dir_kobj = kobj;
+ for (i = 0; i < module->num_sysfs_entries; i++) {
+ int result = toi_register_sysfs_file(kobj,
+ &module->sysfs_data[i]);
+ if (result)
+ return result;
+ }
+ return 0;
+}
+
+/*
+ * toi_unregister_module
+ *
+ * Remove a module.
+ */
+void toi_unregister_module(struct toi_module_ops *module)
+{
+ int i;
+
+ if (module->dir_kobj)
+ for (i = 0; i < module->num_sysfs_entries; i++)
+ toi_unregister_sysfs_file(module->dir_kobj,
+ &module->sysfs_data[i]);
+
+ if (!module->shared_directory && module->directory &&
+ strncmp(module->directory, "[ROOT]", 6))
+ remove_toi_sysdir(module->dir_kobj);
+
+ switch (module->type) {
+ case FILTER_MODULE:
+ list_del(&module->type_list);
+ toi_num_filters--;
+ break;
+ case WRITER_MODULE:
+ list_del(&module->type_list);
+ toiNumAllocators--;
+ if (toiActiveAllocator == module) {
+ toiActiveAllocator = NULL;
+ clear_toi_state(TOI_CAN_RESUME);
+ clear_toi_state(TOI_CAN_HIBERNATE);
+ }
+ break;
+ case MISC_MODULE:
+ case MISC_HIDDEN_MODULE:
+ case BIO_ALLOCATOR_MODULE:
+ break;
+ default:
+ printk(KERN_ERR "Module '%s' has an invalid type."
+ " It has been ignored.\n", module->name);
+ return;
+ }
+ list_del(&module->module_list);
+ toi_num_modules--;
+}
+
+/*
+ * toi_move_module_tail
+ *
+ * Rearrange modules when reloading the config.
+ */
+void toi_move_module_tail(struct toi_module_ops *module)
+{
+ switch (module->type) {
+ case FILTER_MODULE:
+ if (toi_num_filters > 1)
+ list_move_tail(&module->type_list, &toi_filters);
+ break;
+ case WRITER_MODULE:
+ if (toiNumAllocators > 1)
+ list_move_tail(&module->type_list, &toiAllocators);
+ break;
+ case MISC_MODULE:
+ case MISC_HIDDEN_MODULE:
+ case BIO_ALLOCATOR_MODULE:
+ break;
+ default:
+ printk(KERN_ERR "Module '%s' has an invalid type."
+ " It has been ignored.\n", module->name);
+ return;
+ }
+ if ((toi_num_filters + toiNumAllocators) > 1)
+ list_move_tail(&module->module_list, &toi_modules);
+}
+
+/*
+ * toi_initialise_modules
+ *
+ * Get ready to do some work!
+ */
+int toi_initialise_modules(int starting_cycle, int early)
+{
+ struct toi_module_ops *this_module;
+ int result;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ this_module->header_requested = 0;
+ this_module->header_used = 0;
+ if (!this_module->enabled)
+ continue;
+ if (this_module->early != early)
+ continue;
+ if (this_module->initialise) {
+ result = this_module->initialise(starting_cycle);
+ if (result) {
+ toi_cleanup_modules(starting_cycle);
+ return result;
+ }
+ this_module->initialised = 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * toi_cleanup_modules
+ *
+ * Tell modules the work is done.
+ */
+void toi_cleanup_modules(int finishing_cycle)
+{
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (!this_module->enabled || !this_module->initialised)
+ continue;
+ if (this_module->cleanup)
+ this_module->cleanup(finishing_cycle);
+ this_module->initialised = 0;
+ }
+}
+
+/*
+ * toi_pre_atomic_restore_modules
+ *
+ * Get ready to do some work!
+ */
+void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
+{
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (this_module->enabled && this_module->pre_atomic_restore)
+ this_module->pre_atomic_restore(bkd);
+ }
+}
+
+/*
+ * toi_post_atomic_restore_modules
+ *
+ * Get ready to do some work!
+ */
+void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd)
+{
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (this_module->enabled && this_module->post_atomic_restore)
+ this_module->post_atomic_restore(bkd);
+ }
+}
+
+/*
+ * toi_get_next_filter
+ *
+ * Get the next filter in the pipeline.
+ */
+struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
+{
+ struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
+
+ list_for_each_entry(this_filter, &toi_filters, type_list) {
+ if (!this_filter->enabled)
+ continue;
+ if ((last_filter == filter_sought) || (!filter_sought))
+ return this_filter;
+ last_filter = this_filter;
+ }
+
+ return toiActiveAllocator;
+}
+
+/**
+ * toi_show_modules: Printk what support is loaded.
+ */
+void toi_print_modules(void)
+{
+ struct toi_module_ops *this_module;
+ int prev = 0;
+
+ printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION ", with support for");
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ if (this_module->type == MISC_HIDDEN_MODULE)
+ continue;
+ printk("%s %s%s%s", prev ? "," : "",
+ this_module->enabled ? "" : "[",
+ this_module->name,
+ this_module->enabled ? "" : "]");
+ prev = 1;
+ }
+
+ printk(".\n");
+}
+
+/* toi_get_modules
+ *
+ * Take a reference to modules so they can't go away under us.
+ */
+
+int toi_get_modules(void)
+{
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list) {
+ struct toi_module_ops *this_module2;
+
+ if (try_module_get(this_module->module))
+ continue;
+
+ /* Failed! Reverse gets and return error */
+ list_for_each_entry(this_module2, &toi_modules,
+ module_list) {
+ if (this_module == this_module2)
+ return -EINVAL;
+ module_put(this_module2->module);
+ }
+ }
+ return 0;
+}
+
+/* toi_put_modules
+ *
+ * Release our references to modules we used.
+ */
+
+void toi_put_modules(void)
+{
+ struct toi_module_ops *this_module;
+
+ list_for_each_entry(this_module, &toi_modules, module_list)
+ module_put(this_module->module);
+}
diff --git a/kernel/power/tuxonice_modules.h b/kernel/power/tuxonice_modules.h
new file mode 100644
index 000000000..34ffe2ee3
--- /dev/null
+++ b/kernel/power/tuxonice_modules.h
@@ -0,0 +1,212 @@
+/*
+ * kernel/power/tuxonice_modules.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations for modules. Modules are additions to
+ * TuxOnIce that provide facilities such as image compression or
+ * encryption, backends for storage of the image and user interfaces.
+ *
+ */
+
+#ifndef TOI_MODULES_H
+#define TOI_MODULES_H
+
+/* This is the maximum size we store in the image header for a module name */
+#define TOI_MAX_MODULE_NAME_LENGTH 30
+
+struct toi_boot_kernel_data;
+
+/* Per-module metadata */
+struct toi_module_header {
+ char name[TOI_MAX_MODULE_NAME_LENGTH];
+ int enabled;
+ int type;
+ int index;
+ int data_length;
+ unsigned long signature;
+};
+
+enum {
+ FILTER_MODULE,
+ WRITER_MODULE,
+ BIO_ALLOCATOR_MODULE,
+ MISC_MODULE,
+ MISC_HIDDEN_MODULE,
+};
+
+enum {
+ TOI_ASYNC,
+ TOI_SYNC
+};
+
+enum {
+ TOI_VIRT,
+ TOI_PAGE,
+};
+
+#define TOI_MAP(type, addr) \
+ (type == TOI_PAGE ? kmap(addr) : addr)
+
+#define TOI_UNMAP(type, addr) \
+ do { \
+ if (type == TOI_PAGE) \
+ kunmap(addr); \
+ } while(0)
+
+struct toi_module_ops {
+ /* Functions common to all modules */
+ int type;
+ char *name;
+ char *directory;
+ char *shared_directory;
+ struct kobject *dir_kobj;
+ struct module *module;
+ int enabled, early, initialised;
+ struct list_head module_list;
+
+ /* List of filters or allocators */
+ struct list_head list, type_list;
+
+ /*
+ * Requirements for memory and storage in
+ * the image header..
+ */
+ int (*memory_needed) (void);
+ int (*storage_needed) (void);
+
+ int header_requested, header_used;
+
+ int (*expected_compression) (void);
+
+ /*
+ * Debug info
+ */
+ int (*print_debug_info) (char *buffer, int size);
+ int (*save_config_info) (char *buffer);
+ void (*load_config_info) (char *buffer, int len);
+
+ /*
+ * Initialise & cleanup - general routines called
+ * at the start and end of a cycle.
+ */
+ int (*initialise) (int starting_cycle);
+ void (*cleanup) (int finishing_cycle);
+
+ void (*pre_atomic_restore) (struct toi_boot_kernel_data *bkd);
+ void (*post_atomic_restore) (struct toi_boot_kernel_data *bkd);
+
+ /*
+ * Calls for allocating storage (allocators only).
+ *
+ * Header space is requested separately and cannot fail, but the
+ * reservation is only applied when main storage is allocated.
+ * The header space reservation is thus always set prior to
+ * requesting the allocation of storage - and prior to querying
+ * how much storage is available.
+ */
+
+ unsigned long (*storage_available) (void);
+ void (*reserve_header_space) (unsigned long space_requested);
+ int (*register_storage) (void);
+ int (*allocate_storage) (unsigned long space_requested);
+ unsigned long (*storage_allocated) (void);
+ void (*free_unused_storage) (void);
+
+ /*
+ * Routines used in image I/O.
+ */
+ int (*rw_init) (int rw, int stream_number);
+ int (*rw_cleanup) (int rw);
+ int (*write_page) (unsigned long index, int buf_type, void *buf,
+ unsigned int buf_size);
+ int (*read_page) (unsigned long *index, int buf_type, void *buf,
+ unsigned int *buf_size);
+ int (*io_flusher) (int rw);
+
+ /* Reset module if image exists but reading aborted */
+ void (*noresume_reset) (void);
+
+ /* Read and write the metadata */
+ int (*write_header_init) (void);
+ int (*write_header_cleanup) (void);
+
+ int (*read_header_init) (void);
+ int (*read_header_cleanup) (void);
+
+ /* To be called after read_header_init */
+ int (*get_header_version) (void);
+
+ int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
+ char *buffer_start, int buffer_size);
+
+ int (*rw_header_chunk_noreadahead) (int rw,
+ struct toi_module_ops *owner, char *buffer_start,
+ int buffer_size);
+
+ /* Attempt to parse an image location */
+ int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
+
+ /* Throttle I/O according to throughput */
+ void (*update_throughput_throttle) (int jif_index);
+
+ /* Flush outstanding I/O */
+ int (*finish_all_io) (void);
+
+ /* Determine whether image exists that we can restore */
+ int (*image_exists) (int quiet);
+
+ /* Mark the image as having tried to resume */
+ int (*mark_resume_attempted) (int);
+
+ /* Destroy image if one exists */
+ int (*remove_image) (void);
+
+ /* Sysfs Data */
+ struct toi_sysfs_data *sysfs_data;
+ int num_sysfs_entries;
+
+ /* Block I/O allocator */
+ struct toi_bio_allocator_ops *bio_allocator_ops;
+};
+
+extern int toi_num_modules, toiNumAllocators;
+
+extern struct toi_module_ops *toiActiveAllocator;
+extern struct list_head toi_filters, toiAllocators, toi_modules;
+
+extern void toi_prepare_console_modules(void);
+extern void toi_cleanup_console_modules(void);
+
+extern struct toi_module_ops *toi_find_module_given_name(char *name);
+extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
+
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_move_module_tail(struct toi_module_ops *module);
+
+extern long toi_header_storage_for_modules(void);
+extern long toi_memory_for_modules(int print_parts);
+extern void print_toi_header_storage_for_modules(void);
+extern int toi_expected_compression_ratio(void);
+
+extern int toi_print_module_debug_info(char *buffer, int buffer_size);
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_unregister_module(struct toi_module_ops *module);
+
+extern int toi_initialise_modules(int starting_cycle, int early);
+#define toi_initialise_modules_early(starting) \
+ toi_initialise_modules(starting, 1)
+#define toi_initialise_modules_late(starting) \
+ toi_initialise_modules(starting, 0)
+extern void toi_cleanup_modules(int finishing_cycle);
+
+extern void toi_post_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
+extern void toi_pre_atomic_restore_modules(struct toi_boot_kernel_data *bkd);
+
+extern void toi_print_modules(void);
+
+int toi_get_modules(void);
+void toi_put_modules(void);
+#endif
diff --git a/kernel/power/tuxonice_netlink.c b/kernel/power/tuxonice_netlink.c
new file mode 100644
index 000000000..0db58af8b
--- /dev/null
+++ b/kernel/power/tuxonice_netlink.c
@@ -0,0 +1,324 @@
+/*
+ * kernel/power/tuxonice_netlink.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Functions for communicating with a userspace helper via netlink.
+ */
+
+#include <linux/suspend.h>
+#include <linux/sched.h>
+#include <linux/kmod.h>
+#include "tuxonice_netlink.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+
+static struct user_helper_data *uhd_list;
+
+/*
+ * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
+ * none can be allocated).
+ */
+static void toi_fill_skb_pool(struct user_helper_data *uhd)
+{
+ while (uhd->pool_level < uhd->pool_limit) {
+ struct sk_buff *new_skb =
+ alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+ if (!new_skb)
+ break;
+
+ new_skb->next = uhd->emerg_skbs;
+ uhd->emerg_skbs = new_skb;
+ uhd->pool_level++;
+ }
+}
+
+/*
+ * Try to allocate a single skb. If we can't get one, try to use one from
+ * our pool.
+ */
+static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
+{
+ struct sk_buff *skb =
+ alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+ if (skb)
+ return skb;
+
+ skb = uhd->emerg_skbs;
+ if (skb) {
+ uhd->pool_level--;
+ uhd->emerg_skbs = skb->next;
+ skb->next = NULL;
+ }
+
+ return skb;
+}
+
+void toi_send_netlink_message(struct user_helper_data *uhd,
+ int type, void *params, size_t len)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ void *dest;
+ struct task_struct *t;
+
+ if (uhd->pid == -1)
+ return;
+
+ if (uhd->debug)
+ printk(KERN_ERR "toi_send_netlink_message: Send "
+ "message type %d.\n", type);
+
+ skb = toi_get_skb(uhd);
+ if (!skb) {
+ printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
+ return;
+ }
+
+ nlh = nlmsg_put(skb, 0, uhd->sock_seq, type, len, 0);
+ uhd->sock_seq++;
+
+ dest = NLMSG_DATA(nlh);
+ if (params && len > 0)
+ memcpy(dest, params, len);
+
+ netlink_unicast(uhd->nl, skb, uhd->pid, 0);
+
+ toi_read_lock_tasklist();
+ t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
+ if (!t) {
+ toi_read_unlock_tasklist();
+ if (uhd->pid > -1)
+ printk(KERN_INFO "Hmm. Can't find the userspace task"
+ " %d.\n", uhd->pid);
+ return;
+ }
+ wake_up_process(t);
+ toi_read_unlock_tasklist();
+
+ yield();
+}
+
+static void send_whether_debugging(struct user_helper_data *uhd)
+{
+ static u8 is_debugging = 1;
+
+ toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
+ &is_debugging, sizeof(u8));
+}
+
+/*
+ * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
+ * are hibernating.
+ */
+static int nl_set_nofreeze(struct user_helper_data *uhd, __u32 pid)
+{
+ struct task_struct *t;
+
+ if (uhd->debug)
+ printk(KERN_ERR "nl_set_nofreeze for pid %d.\n", pid);
+
+ toi_read_lock_tasklist();
+ t = find_task_by_pid_ns(pid, &init_pid_ns);
+ if (!t) {
+ toi_read_unlock_tasklist();
+ printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
+ pid);
+ return -EINVAL;
+ }
+
+ t->flags |= PF_NOFREEZE;
+
+ toi_read_unlock_tasklist();
+ uhd->pid = pid;
+
+ toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
+
+ return 0;
+}
+
+/*
+ * Called when the userspace process has informed us that it's ready to roll.
+ */
+static int nl_ready(struct user_helper_data *uhd, u32 version)
+{
+ if (version != uhd->interface_version) {
+ printk(KERN_INFO "%s userspace process using invalid interface"
+ " version (%d - kernel wants %d). Trying to "
+ "continue without it.\n",
+ uhd->name, version, uhd->interface_version);
+ if (uhd->not_ready)
+ uhd->not_ready();
+ return -EINVAL;
+ }
+
+ complete(&uhd->wait_for_process);
+
+ return 0;
+}
+
+void toi_netlink_close_complete(struct user_helper_data *uhd)
+{
+ if (uhd->nl) {
+ netlink_kernel_release(uhd->nl);
+ uhd->nl = NULL;
+ }
+
+ while (uhd->emerg_skbs) {
+ struct sk_buff *next = uhd->emerg_skbs->next;
+ kfree_skb(uhd->emerg_skbs);
+ uhd->emerg_skbs = next;
+ }
+
+ uhd->pid = -1;
+}
+
+static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
+ struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int type = nlh->nlmsg_type;
+ int *data;
+ int err;
+
+ if (uhd->debug)
+ printk(KERN_ERR "toi_user_rcv_skb: Received message %d.\n",
+ type);
+
+ /* Let the more specific handler go first. It returns
+ * 1 for valid messages that it doesn't know. */
+ err = uhd->rcv_msg(skb, nlh);
+ if (err != 1)
+ return err;
+
+ /* Only allow one task to receive NOFREEZE privileges */
+ if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
+ printk(KERN_INFO "Received extra nofreeze me requests.\n");
+ return -EBUSY;
+ }
+
+ data = NLMSG_DATA(nlh);
+
+ switch (type) {
+ case NETLINK_MSG_NOFREEZE_ME:
+ return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
+ case NETLINK_MSG_GET_DEBUGGING:
+ send_whether_debugging(uhd);
+ return 0;
+ case NETLINK_MSG_READY:
+ if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(u32))) {
+ printk(KERN_INFO "Invalid ready mesage.\n");
+ if (uhd->not_ready)
+ uhd->not_ready();
+ return -EINVAL;
+ }
+ return nl_ready(uhd, (u32) *data);
+ case NETLINK_MSG_CLEANUP:
+ toi_netlink_close_complete(uhd);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void toi_user_rcv_skb(struct sk_buff *skb)
+{
+ int err;
+ struct nlmsghdr *nlh;
+ struct user_helper_data *uhd = uhd_list;
+
+ while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
+ uhd = uhd->next;
+
+ if (!uhd)
+ return;
+
+ while (skb->len >= NLMSG_SPACE(0)) {
+ u32 rlen;
+
+ nlh = (struct nlmsghdr *) skb->data;
+ if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+ return;
+
+ rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (rlen > skb->len)
+ rlen = skb->len;
+
+ err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
+ if (err)
+ netlink_ack(skb, nlh, err);
+ else if (nlh->nlmsg_flags & NLM_F_ACK)
+ netlink_ack(skb, nlh, 0);
+ skb_pull(skb, rlen);
+ }
+}
+
+static int netlink_prepare(struct user_helper_data *uhd)
+{
+ struct netlink_kernel_cfg cfg = {
+ .groups = 0,
+ .input = toi_user_rcv_skb,
+ };
+
+ uhd->next = uhd_list;
+ uhd_list = uhd;
+
+ uhd->sock_seq = 0x42c0ffee;
+ uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, &cfg);
+ if (!uhd->nl) {
+ printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
+ uhd->name);
+ return -ENOMEM;
+ }
+
+ toi_fill_skb_pool(uhd);
+
+ return 0;
+}
+
+void toi_netlink_close(struct user_helper_data *uhd)
+{
+ struct task_struct *t;
+
+ toi_read_lock_tasklist();
+ t = find_task_by_pid_ns(uhd->pid, &init_pid_ns);
+ if (t)
+ t->flags &= ~PF_NOFREEZE;
+ toi_read_unlock_tasklist();
+
+ toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
+}
+int toi_netlink_setup(struct user_helper_data *uhd)
+{
+ /* In case userui didn't cleanup properly on us */
+ toi_netlink_close_complete(uhd);
+
+ if (netlink_prepare(uhd) < 0) {
+ printk(KERN_INFO "Netlink prepare failed.\n");
+ return 1;
+ }
+
+ if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
+ UMH_WAIT_EXEC, uhd->debug) < 0) {
+ printk(KERN_INFO "Launch userspace program failed.\n");
+ toi_netlink_close_complete(uhd);
+ return 1;
+ }
+
+ /* Wait 2 seconds for the userspace process to make contact */
+ wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
+
+ if (uhd->pid == -1) {
+ printk(KERN_INFO "%s: Failed to contact userspace process.\n",
+ uhd->name);
+ toi_netlink_close_complete(uhd);
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/kernel/power/tuxonice_netlink.h b/kernel/power/tuxonice_netlink.h
new file mode 100644
index 000000000..89e154599
--- /dev/null
+++ b/kernel/power/tuxonice_netlink.h
@@ -0,0 +1,62 @@
+/*
+ * kernel/power/tuxonice_netlink.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for functions for communicating with a userspace helper
+ * via netlink.
+ */
+
+#include <linux/netlink.h>
+#include <net/sock.h>
+
+#define NETLINK_MSG_BASE 0x10
+
+#define NETLINK_MSG_READY 0x10
+#define NETLINK_MSG_NOFREEZE_ME 0x16
+#define NETLINK_MSG_GET_DEBUGGING 0x19
+#define NETLINK_MSG_CLEANUP 0x24
+#define NETLINK_MSG_NOFREEZE_ACK 0x27
+#define NETLINK_MSG_IS_DEBUGGING 0x28
+
+struct user_helper_data {
+ int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
+ void (*not_ready) (void);
+ struct sock *nl;
+ u32 sock_seq;
+ pid_t pid;
+ char *comm;
+ char program[256];
+ int pool_level;
+ int pool_limit;
+ struct sk_buff *emerg_skbs;
+ int skb_size;
+ int netlink_id;
+ char *name;
+ struct user_helper_data *next;
+ struct completion wait_for_process;
+ u32 interface_version;
+ int must_init;
+ int debug;
+};
+
+#ifdef CONFIG_NET
+int toi_netlink_setup(struct user_helper_data *uhd);
+void toi_netlink_close(struct user_helper_data *uhd);
+void toi_send_netlink_message(struct user_helper_data *uhd,
+ int type, void *params, size_t len);
+void toi_netlink_close_complete(struct user_helper_data *uhd);
+#else
+static inline int toi_netlink_setup(struct user_helper_data *uhd)
+{
+ return 0;
+}
+
+static inline void toi_netlink_close(struct user_helper_data *uhd) { };
+static inline void toi_send_netlink_message(struct user_helper_data *uhd,
+ int type, void *params, size_t len) { };
+static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
+ { };
+#endif
diff --git a/kernel/power/tuxonice_pagedir.c b/kernel/power/tuxonice_pagedir.c
new file mode 100644
index 000000000..9ea185af1
--- /dev/null
+++ b/kernel/power/tuxonice_pagedir.c
@@ -0,0 +1,345 @@
+/*
+ * kernel/power/tuxonice_pagedir.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for handling pagesets.
+ * Note that pbes aren't actually stored as such. They're stored as
+ * bitmaps and extents.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/cpu.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
+
+static int ptoi_pfn;
+static struct pbe *this_low_pbe;
+static struct pbe **last_low_pbe_ptr;
+
+void toi_reset_alt_image_pageset2_pfn(void)
+{
+ memory_bm_position_reset(pageset2_map);
+}
+
+static struct page *first_conflicting_page;
+
+/*
+ * free_conflicting_pages
+ */
+
+static void free_conflicting_pages(void)
+{
+ while (first_conflicting_page) {
+ struct page *next =
+ *((struct page **) kmap(first_conflicting_page));
+ kunmap(first_conflicting_page);
+ toi__free_page(29, first_conflicting_page);
+ first_conflicting_page = next;
+ }
+}
+
+/* __toi_get_nonconflicting_page
+ *
+ * Description: Gets order zero pages that won't be overwritten
+ * while copying the original pages.
+ */
+
+struct page *___toi_get_nonconflicting_page(int can_be_highmem)
+{
+ struct page *page;
+ gfp_t flags = TOI_ATOMIC_GFP;
+ if (can_be_highmem)
+ flags |= __GFP_HIGHMEM;
+
+
+ if (test_toi_state(TOI_LOADING_ALT_IMAGE) &&
+ pageset2_map && ptoi_pfn) {
+ do {
+ ptoi_pfn = memory_bm_next_pfn(pageset2_map, 0);
+ if (ptoi_pfn != BM_END_OF_MAP) {
+ page = pfn_to_page(ptoi_pfn);
+ if (!PagePageset1(page) &&
+ (can_be_highmem || !PageHighMem(page)))
+ return page;
+ }
+ } while (ptoi_pfn);
+ }
+
+ do {
+ page = toi_alloc_page(29, flags | __GFP_ZERO);
+ if (!page) {
+ printk(KERN_INFO "Failed to get nonconflicting "
+ "page.\n");
+ return NULL;
+ }
+ if (PagePageset1(page)) {
+ struct page **next = (struct page **) kmap(page);
+ *next = first_conflicting_page;
+ first_conflicting_page = page;
+ kunmap(page);
+ }
+ } while (PagePageset1(page));
+
+ return page;
+}
+
+unsigned long __toi_get_nonconflicting_page(void)
+{
+ struct page *page = ___toi_get_nonconflicting_page(0);
+ return page ? (unsigned long) page_address(page) : 0;
+}
+
+static struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
+ int highmem)
+{
+ if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
+ + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
+ struct page *new_page =
+ ___toi_get_nonconflicting_page(highmem);
+ if (!new_page)
+ return ERR_PTR(-ENOMEM);
+ this_pbe = (struct pbe *) kmap(new_page);
+ memset(this_pbe, 0, PAGE_SIZE);
+ *page_ptr = new_page;
+ } else
+ this_pbe++;
+
+ return this_pbe;
+}
+
+/**
+ * get_pageset1_load_addresses - generate pbes for conflicting pages
+ *
+ * We check here that pagedir & pages it points to won't collide
+ * with pages where we're going to restore from the loaded pages
+ * later.
+ *
+ * Returns:
+ * Zero on success, one if couldn't find enough pages (shouldn't
+ * happen).
+ **/
+int toi_get_pageset1_load_addresses(void)
+{
+ int pfn, highallocd = 0, lowallocd = 0;
+ int low_needed = pagedir1.size - get_highmem_size(pagedir1);
+ int high_needed = get_highmem_size(pagedir1);
+ int low_pages_for_highmem = 0;
+ gfp_t flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
+ struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
+ *low_pbe_page, *last_low_pbe_page = NULL;
+ struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
+ *this_high_pbe = NULL;
+ unsigned long orig_low_pfn, orig_high_pfn;
+ int high_pbes_done = 0, low_pbes_done = 0;
+ int low_direct = 0, high_direct = 0, result = 0, i;
+ int high_page = 1, high_offset = 0, low_page = 1, low_offset = 0;
+
+ toi_trace_index++;
+
+ memory_bm_position_reset(pageset1_map);
+ memory_bm_position_reset(pageset1_copy_map);
+
+ last_low_pbe_ptr = &restore_pblist;
+
+ /* First, allocate pages for the start of our pbe lists. */
+ if (high_needed) {
+ high_pbe_page = ___toi_get_nonconflicting_page(1);
+ if (!high_pbe_page) {
+ result = -ENOMEM;
+ goto out;
+ }
+ this_high_pbe = (struct pbe *) kmap(high_pbe_page);
+ memset(this_high_pbe, 0, PAGE_SIZE);
+ }
+
+ low_pbe_page = ___toi_get_nonconflicting_page(0);
+ if (!low_pbe_page) {
+ result = -ENOMEM;
+ goto out;
+ }
+ this_low_pbe = (struct pbe *) page_address(low_pbe_page);
+
+ /*
+ * Next, allocate the number of pages we need.
+ */
+
+ i = low_needed + high_needed;
+
+ do {
+ int is_high;
+
+ if (i == low_needed)
+ flags &= ~__GFP_HIGHMEM;
+
+ page = toi_alloc_page(30, flags);
+ BUG_ON(!page);
+
+ SetPagePageset1Copy(page);
+ is_high = PageHighMem(page);
+
+ if (PagePageset1(page)) {
+ if (is_high)
+ high_direct++;
+ else
+ low_direct++;
+ } else {
+ if (is_high)
+ highallocd++;
+ else
+ lowallocd++;
+ }
+ } while (--i);
+
+ high_needed -= high_direct;
+ low_needed -= low_direct;
+
+ /*
+ * Do we need to use some lowmem pages for the copies of highmem
+ * pages?
+ */
+ if (high_needed > highallocd) {
+ low_pages_for_highmem = high_needed - highallocd;
+ high_needed -= low_pages_for_highmem;
+ low_needed += low_pages_for_highmem;
+ }
+
+ /*
+ * Now generate our pbes (which will be used for the atomic restore),
+ * and free unneeded pages.
+ */
+ memory_bm_position_reset(pageset1_copy_map);
+ for (pfn = memory_bm_next_pfn(pageset1_copy_map, 0); pfn != BM_END_OF_MAP;
+ pfn = memory_bm_next_pfn(pageset1_copy_map, 0)) {
+ int is_high;
+ page = pfn_to_page(pfn);
+ is_high = PageHighMem(page);
+
+ if (PagePageset1(page))
+ continue;
+
+ /* Nope. We're going to use this page. Add a pbe. */
+ if (is_high || low_pages_for_highmem) {
+ struct page *orig_page;
+ high_pbes_done++;
+ if (!is_high)
+ low_pages_for_highmem--;
+ do {
+ orig_high_pfn = memory_bm_next_pfn(pageset1_map, 0);
+ BUG_ON(orig_high_pfn == BM_END_OF_MAP);
+ orig_page = pfn_to_page(orig_high_pfn);
+ } while (!PageHighMem(orig_page) ||
+ PagePageset1Copy(orig_page));
+
+ this_high_pbe->orig_address = (void *) orig_high_pfn;
+ this_high_pbe->address = page;
+ this_high_pbe->next = NULL;
+ toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "High pbe %d/%d: %p(%d)=>%p",
+ high_page, high_offset, page, orig_high_pfn, orig_page);
+ if (last_high_pbe_page != high_pbe_page) {
+ *last_high_pbe_ptr =
+ (struct pbe *) high_pbe_page;
+ if (last_high_pbe_page) {
+ kunmap(last_high_pbe_page);
+ high_page++;
+ high_offset = 0;
+ } else
+ high_offset++;
+ last_high_pbe_page = high_pbe_page;
+ } else {
+ *last_high_pbe_ptr = this_high_pbe;
+ high_offset++;
+ }
+ last_high_pbe_ptr = &this_high_pbe->next;
+ this_high_pbe = get_next_pbe(&high_pbe_page,
+ this_high_pbe, 1);
+ if (IS_ERR(this_high_pbe)) {
+ printk(KERN_INFO
+ "This high pbe is an error.\n");
+ return -ENOMEM;
+ }
+ } else {
+ struct page *orig_page;
+ low_pbes_done++;
+ do {
+ orig_low_pfn = memory_bm_next_pfn(pageset1_map, 0);
+ BUG_ON(orig_low_pfn == BM_END_OF_MAP);
+ orig_page = pfn_to_page(orig_low_pfn);
+ } while (PageHighMem(orig_page) ||
+ PagePageset1Copy(orig_page));
+
+ this_low_pbe->orig_address = page_address(orig_page);
+ this_low_pbe->address = page_address(page);
+ this_low_pbe->next = NULL;
+ toi_message(TOI_PAGEDIR, TOI_VERBOSE, 0, "Low pbe %d/%d: %p(%d)=>%p",
+ low_page, low_offset, this_low_pbe->orig_address,
+ orig_low_pfn, this_low_pbe->address);
+ TOI_TRACE_DEBUG(orig_low_pfn, "LoadAddresses (%d/%d): %p=>%p", low_page, low_offset, this_low_pbe->orig_address, this_low_pbe->address);
+ *last_low_pbe_ptr = this_low_pbe;
+ last_low_pbe_ptr = &this_low_pbe->next;
+ this_low_pbe = get_next_pbe(&low_pbe_page,
+ this_low_pbe, 0);
+ if (low_pbe_page != last_low_pbe_page) {
+ if (last_low_pbe_page) {
+ low_page++;
+ low_offset = 0;
+ } else {
+ low_offset++;
+ }
+ last_low_pbe_page = low_pbe_page;
+ } else
+ low_offset++;
+ if (IS_ERR(this_low_pbe)) {
+ printk(KERN_INFO "this_low_pbe is an error.\n");
+ return -ENOMEM;
+ }
+ }
+ }
+
+ if (high_pbe_page)
+ kunmap(high_pbe_page);
+
+ if (last_high_pbe_page != high_pbe_page) {
+ if (last_high_pbe_page)
+ kunmap(last_high_pbe_page);
+ toi__free_page(29, high_pbe_page);
+ }
+
+ free_conflicting_pages();
+
+out:
+ return result;
+}
+
+int add_boot_kernel_data_pbe(void)
+{
+ this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
+ if (!this_low_pbe->address) {
+ printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
+ return -ENOMEM;
+ }
+
+ toi_bkd.size = sizeof(toi_bkd);
+ memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
+
+ *last_low_pbe_ptr = this_low_pbe;
+ this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
+ this_low_pbe->next = NULL;
+ return 0;
+}
diff --git a/kernel/power/tuxonice_pagedir.h b/kernel/power/tuxonice_pagedir.h
new file mode 100644
index 000000000..80d1a3d8c
--- /dev/null
+++ b/kernel/power/tuxonice_pagedir.h
@@ -0,0 +1,50 @@
+/*
+ * kernel/power/tuxonice_pagedir.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for routines for handling pagesets.
+ */
+
+#ifndef KERNEL_POWER_PAGEDIR_H
+#define KERNEL_POWER_PAGEDIR_H
+
+/* Pagedir
+ *
+ * Contains the metadata for a set of pages saved in the image.
+ */
+
+struct pagedir {
+ int id;
+ unsigned long size;
+#ifdef CONFIG_HIGHMEM
+ unsigned long size_high;
+#endif
+};
+
+#ifdef CONFIG_HIGHMEM
+#define get_highmem_size(pagedir) (pagedir.size_high)
+#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
+#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
+#else
+#define get_highmem_size(pagedir) (0)
+#define set_highmem_size(pagedir, sz) do { } while (0)
+#define inc_highmem_size(pagedir) do { } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size)
+#endif
+
+extern struct pagedir pagedir1, pagedir2;
+
+extern void toi_copy_pageset1(void);
+
+extern int toi_get_pageset1_load_addresses(void);
+
+extern unsigned long __toi_get_nonconflicting_page(void);
+struct page *___toi_get_nonconflicting_page(int can_be_highmem);
+
+extern void toi_reset_alt_image_pageset2_pfn(void);
+extern int add_boot_kernel_data_pbe(void);
+#endif
diff --git a/kernel/power/tuxonice_pageflags.c b/kernel/power/tuxonice_pageflags.c
new file mode 100644
index 000000000..307d09f33
--- /dev/null
+++ b/kernel/power/tuxonice_pageflags.c
@@ -0,0 +1,18 @@
+/*
+ * kernel/power/tuxonice_pageflags.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for serialising and relocating pageflags in which we
+ * store our image metadata.
+ */
+
+#include "tuxonice_pageflags.h"
+#include "power.h"
+
+int toi_pageflags_space_needed(void)
+{
+ return memory_bm_space_needed(pageset1_map);
+}
diff --git a/kernel/power/tuxonice_pageflags.h b/kernel/power/tuxonice_pageflags.h
new file mode 100644
index 000000000..30ee577c3
--- /dev/null
+++ b/kernel/power/tuxonice_pageflags.h
@@ -0,0 +1,106 @@
+/*
+ * kernel/power/tuxonice_pageflags.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef KERNEL_POWER_TUXONICE_PAGEFLAGS_H
+#define KERNEL_POWER_TUXONICE_PAGEFLAGS_H
+
+struct memory_bitmap;
+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+void memory_bm_clear(struct memory_bitmap *bm);
+
+int mem_bm_set_bit_check(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+unsigned long memory_bm_next_pfn(struct memory_bitmap *bm, int index);
+unsigned long memory_bm_next_pfn_index(struct memory_bitmap *bm, int index);
+void memory_bm_position_reset(struct memory_bitmap *bm);
+void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
+int toi_alloc_bitmap(struct memory_bitmap **bm);
+void toi_free_bitmap(struct memory_bitmap **bm);
+void memory_bm_clear(struct memory_bitmap *bm);
+void memory_bm_clear_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_set_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+int memory_bm_test_bit(struct memory_bitmap *bm, int index, unsigned long pfn);
+int memory_bm_test_bit_index(struct memory_bitmap *bm, int index, unsigned long pfn);
+void memory_bm_clear_bit_index(struct memory_bitmap *bm, int index, unsigned long pfn);
+
+struct toi_module_ops;
+int memory_bm_write(struct memory_bitmap *bm, int (*rw_chunk)
+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
+int memory_bm_read(struct memory_bitmap *bm, int (*rw_chunk)
+ (int rw, struct toi_module_ops *owner, char *buffer, int buffer_size));
+int memory_bm_space_needed(struct memory_bitmap *bm);
+
+extern struct memory_bitmap *pageset1_map;
+extern struct memory_bitmap *pageset1_copy_map;
+extern struct memory_bitmap *pageset2_map;
+extern struct memory_bitmap *page_resave_map;
+extern struct memory_bitmap *io_map;
+extern struct memory_bitmap *nosave_map;
+extern struct memory_bitmap *free_map;
+extern struct memory_bitmap *compare_map;
+
+#define PagePageset1(page) \
+ (pageset1_map && memory_bm_test_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset1(page) \
+ (memory_bm_set_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset1(page) \
+ (memory_bm_clear_bit(pageset1_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PagePageset1Copy(page) \
+ (memory_bm_test_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset1Copy(page) \
+ (memory_bm_set_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset1Copy(page) \
+ (memory_bm_clear_bit(pageset1_copy_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PagePageset2(page) \
+ (memory_bm_test_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPagePageset2(page) \
+ (memory_bm_set_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPagePageset2(page) \
+ (memory_bm_clear_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageWasRW(page) \
+ (memory_bm_test_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define SetPageWasRW(page) \
+ (memory_bm_set_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageWasRW(page) \
+ (memory_bm_clear_bit(pageset2_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageResave(page) (page_resave_map ? \
+ memory_bm_test_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageResave(page) \
+ (memory_bm_set_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageResave(page) \
+ (memory_bm_clear_bit(page_resave_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageNosave(page) (nosave_map ? \
+ memory_bm_test_bit(nosave_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageNosave(page) \
+ (mem_bm_set_bit_check(nosave_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageNosave(page) \
+ (memory_bm_clear_bit(nosave_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageNosaveFree(page) (free_map ? \
+ memory_bm_test_bit(free_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageNosaveFree(page) \
+ (memory_bm_set_bit(free_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageNosaveFree(page) \
+ (memory_bm_clear_bit(free_map, smp_processor_id(), page_to_pfn(page)))
+
+#define PageCompareChanged(page) (compare_map ? \
+ memory_bm_test_bit(compare_map, smp_processor_id(), page_to_pfn(page)) : 0)
+#define SetPageCompareChanged(page) \
+ (memory_bm_set_bit(compare_map, smp_processor_id(), page_to_pfn(page)))
+#define ClearPageCompareChanged(page) \
+ (memory_bm_clear_bit(compare_map, smp_processor_id(), page_to_pfn(page)))
+
+extern void save_pageflags(struct memory_bitmap *pagemap);
+extern int load_pageflags(struct memory_bitmap *pagemap);
+extern int toi_pageflags_space_needed(void);
+#endif
diff --git a/kernel/power/tuxonice_power_off.c b/kernel/power/tuxonice_power_off.c
new file mode 100644
index 000000000..f8e969625
--- /dev/null
+++ b/kernel/power/tuxonice_power_off.c
@@ -0,0 +1,286 @@
+/*
+ * kernel/power/tuxonice_power_off.c
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for powering down.
+ */
+
+#include <linux/device.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <linux/pm.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/fs.h>
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+
+unsigned long toi_poweroff_method; /* 0 - Kernel power off */
+
+static int wake_delay;
+static char lid_state_file[256], wake_alarm_dir[256];
+static struct file *lid_file, *alarm_file, *epoch_file;
+static int post_wake_state = -1;
+
+static int did_suspend_to_both;
+
+/*
+ * __toi_power_down
+ * Functionality : Powers down or reboots the computer once the image
+ * has been written to disk.
+ * Key Assumptions : Able to reboot/power down via code called or that
+ * the warning emitted if the calls fail will be visible
+ * to the user (ie printk resumes devices).
+ */
+
+static void __toi_power_down(int method)
+{
+ int error;
+
+ toi_cond_pause(1, test_action_state(TOI_REBOOT) ? "Ready to reboot." :
+ "Powering down.");
+
+ if (test_result_state(TOI_ABORTED))
+ goto out;
+
+ if (test_action_state(TOI_REBOOT))
+ kernel_restart(NULL);
+
+ switch (method) {
+ case 0:
+ break;
+ case 3:
+ /*
+ * Re-read the overwritten part of pageset2 to make post-resume
+ * faster.
+ */
+ if (read_pageset2(1))
+ panic("Attempt to reload pagedir 2 failed. "
+ "Try rebooting.");
+
+ pm_prepare_console();
+
+ error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+ if (!error) {
+ pm_restore_gfp_mask();
+ error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+ pm_restrict_gfp_mask();
+ if (!error)
+ did_suspend_to_both = 1;
+ }
+ pm_notifier_call_chain(PM_POST_SUSPEND);
+ pm_restore_console();
+
+ /* Success - we're now post-resume-from-ram */
+ if (did_suspend_to_both)
+ return;
+
+ /* Failed to suspend to ram - do normal power off */
+ break;
+ case 4:
+ /*
+ * If succeeds, doesn't return. If fails, do a simple
+ * powerdown.
+ */
+ hibernation_platform_enter();
+ break;
+ case 5:
+ /* Historic entry only now */
+ break;
+ }
+
+ if (method && method != 5)
+ toi_cond_pause(1,
+ "Falling back to alternate power off method.");
+
+ if (test_result_state(TOI_ABORTED))
+ goto out;
+
+ if (pm_power_off)
+ kernel_power_off();
+ kernel_halt();
+ toi_cond_pause(1, "Powerdown failed.");
+ while (1)
+ cpu_relax();
+
+out:
+ if (read_pageset2(1))
+ panic("Attempt to reload pagedir 2 failed. Try rebooting.");
+ return;
+}
+
+#define CLOSE_FILE(file) \
+ if (file) { \
+ filp_close(file, NULL); file = NULL; \
+ }
+
+static void powerdown_cleanup(int toi_or_resume)
+{
+ if (!toi_or_resume)
+ return;
+
+ CLOSE_FILE(lid_file);
+ CLOSE_FILE(alarm_file);
+ CLOSE_FILE(epoch_file);
+}
+
+static void open_file(char *format, char *arg, struct file **var, int mode,
+ char *desc)
+{
+ char buf[256];
+
+ if (strlen(arg)) {
+ sprintf(buf, format, arg);
+ *var = filp_open(buf, mode, 0);
+ if (IS_ERR(*var) || !*var) {
+ printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
+ desc, buf, *var);
+ *var = NULL;
+ }
+ }
+}
+
+static int powerdown_init(int toi_or_resume)
+{
+ if (!toi_or_resume)
+ return 0;
+
+ did_suspend_to_both = 0;
+
+ open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
+ O_RDONLY, "lid");
+
+ if (strlen(wake_alarm_dir)) {
+ open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
+ &alarm_file, O_WRONLY, "alarm");
+
+ open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
+ &epoch_file, O_RDONLY, "epoch");
+ }
+
+ return 0;
+}
+
+static int lid_closed(void)
+{
+ char array[25];
+ ssize_t size;
+ loff_t pos = 0;
+
+ if (!lid_file)
+ return 0;
+
+ size = vfs_read(lid_file, (char __user *) array, 25, &pos);
+ if ((int) size < 1) {
+ printk(KERN_INFO "Failed to read lid state file (%d).\n",
+ (int) size);
+ return 0;
+ }
+
+ if (!strcmp(array, "state: closed\n"))
+ return 1;
+
+ return 0;
+}
+
+static void write_alarm_file(int value)
+{
+ ssize_t size;
+ char buf[40];
+ loff_t pos = 0;
+
+ if (!alarm_file)
+ return;
+
+ sprintf(buf, "%d\n", value);
+
+ size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
+
+ if (size < 0)
+ printk(KERN_INFO "Error %d writing alarm value %s.\n",
+ (int) size, buf);
+}
+
+/**
+ * toi_check_resleep: See whether to powerdown again after waking.
+ *
+ * After waking, check whether we should powerdown again in a (usually
+ * different) way. We only do this if the lid switch is still closed.
+ */
+void toi_check_resleep(void)
+{
+ /* We only return if we suspended to ram and woke. */
+ if (lid_closed() && post_wake_state >= 0)
+ __toi_power_down(post_wake_state);
+}
+
+void toi_power_down(void)
+{
+ if (alarm_file && wake_delay) {
+ char array[25];
+ loff_t pos = 0;
+ size_t size = vfs_read(epoch_file, (char __user *) array, 25,
+ &pos);
+
+ if (((int) size) < 1)
+ printk(KERN_INFO "Failed to read epoch file (%d).\n",
+ (int) size);
+ else {
+ unsigned long since_epoch;
+ if (!kstrtoul(array, 0, &since_epoch)) {
+ /* Clear any wakeup time. */
+ write_alarm_file(0);
+
+ /* Set new wakeup time. */
+ write_alarm_file(since_epoch + wake_delay);
+ }
+ }
+ }
+
+ __toi_power_down(toi_poweroff_method);
+
+ toi_check_resleep();
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_ACPI)
+ SYSFS_STRING("lid_file", SYSFS_RW, lid_state_file, 256, 0, NULL),
+ SYSFS_INT("wake_delay", SYSFS_RW, &wake_delay, 0, INT_MAX, 0, NULL),
+ SYSFS_STRING("wake_alarm_dir", SYSFS_RW, wake_alarm_dir, 256, 0, NULL),
+ SYSFS_INT("post_wake_state", SYSFS_RW, &post_wake_state, -1, 5, 0,
+ NULL),
+ SYSFS_UL("powerdown_method", SYSFS_RW, &toi_poweroff_method, 0, 5, 0),
+ SYSFS_INT("did_suspend_to_both", SYSFS_READONLY, &did_suspend_to_both,
+ 0, 0, 0, NULL)
+#endif
+};
+
+static struct toi_module_ops powerdown_ops = {
+ .type = MISC_HIDDEN_MODULE,
+ .name = "poweroff",
+ .initialise = powerdown_init,
+ .cleanup = powerdown_cleanup,
+ .directory = "[ROOT]",
+ .module = THIS_MODULE,
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+int toi_poweroff_init(void)
+{
+ return toi_register_module(&powerdown_ops);
+}
+
+void toi_poweroff_exit(void)
+{
+ toi_unregister_module(&powerdown_ops);
+}
diff --git a/kernel/power/tuxonice_power_off.h b/kernel/power/tuxonice_power_off.h
new file mode 100644
index 000000000..6e1d8bb39
--- /dev/null
+++ b/kernel/power/tuxonice_power_off.h
@@ -0,0 +1,24 @@
+/*
+ * kernel/power/tuxonice_power_off.h
+ *
+ * Copyright (C) 2006-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for the powering down.
+ */
+
+int toi_pm_state_finish(void);
+void toi_power_down(void);
+extern unsigned long toi_poweroff_method;
+int toi_poweroff_init(void);
+void toi_poweroff_exit(void);
+void toi_check_resleep(void);
+
+extern int platform_begin(int platform_mode);
+extern int platform_pre_snapshot(int platform_mode);
+extern void platform_leave(int platform_mode);
+extern void platform_end(int platform_mode);
+extern void platform_finish(int platform_mode);
+extern int platform_pre_restore(int platform_mode);
+extern void platform_restore_cleanup(int platform_mode);
diff --git a/kernel/power/tuxonice_prepare_image.c b/kernel/power/tuxonice_prepare_image.c
new file mode 100644
index 000000000..e0593252f
--- /dev/null
+++ b/kernel/power/tuxonice_prepare_image.c
@@ -0,0 +1,1080 @@
+/*
+ * kernel/power/tuxonice_prepare_image.c
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * We need to eat memory until we can:
+ * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
+ * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
+ * main_storage_needed())
+ * 3. Reload the pagedir and pageset1 to places that don't collide with their
+ * final destinations, not knowing to what extent the resumed kernel will
+ * overlap with the one loaded at boot time. I think the resumed kernel
+ * should overlap completely, but I don't want to rely on this as it is
+ * an unproven assumption. We therefore assume there will be no overlap at
+ * all (worse case).
+ * 4. Meet the user's requested limit (if any) on the size of the image.
+ * The limit is in MB, so pages/256 (assuming 4K pages).
+ *
+ */
+
+#include <linux/highmem.h>
+#include <linux/freezer.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/console.h>
+#include <linux/tuxonice.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_builtin.h"
+
+static unsigned long num_nosave, main_storage_allocated, storage_limit,
+ header_storage_needed;
+unsigned long extra_pd1_pages_allowance =
+ CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
+long image_size_limit = CONFIG_TOI_DEFAULT_IMAGE_SIZE_LIMIT;
+static int no_ps2_needed;
+
+struct attention_list {
+ struct task_struct *task;
+ struct attention_list *next;
+};
+
+static struct attention_list *attention_list;
+
+#define PAGESET1 0
+#define PAGESET2 1
+
+void free_attention_list(void)
+{
+ struct attention_list *last = NULL;
+
+ while (attention_list) {
+ last = attention_list;
+ attention_list = attention_list->next;
+ toi_kfree(6, last, sizeof(*last));
+ }
+}
+
+static int build_attention_list(void)
+{
+ int i, task_count = 0;
+ struct task_struct *p;
+ struct attention_list *next;
+
+ /*
+ * Count all userspace process (with task->mm) marked PF_NOFREEZE.
+ */
+ toi_read_lock_tasklist();
+ for_each_process(p)
+ if ((p->flags & PF_NOFREEZE) || p == current)
+ task_count++;
+ toi_read_unlock_tasklist();
+
+ /*
+ * Allocate attention list structs.
+ */
+ for (i = 0; i < task_count; i++) {
+ struct attention_list *this =
+ toi_kzalloc(6, sizeof(struct attention_list),
+ TOI_WAIT_GFP);
+ if (!this) {
+ printk(KERN_INFO "Failed to allocate slab for "
+ "attention list.\n");
+ free_attention_list();
+ return 1;
+ }
+ this->next = NULL;
+ if (attention_list)
+ this->next = attention_list;
+ attention_list = this;
+ }
+
+ next = attention_list;
+ toi_read_lock_tasklist();
+ for_each_process(p)
+ if ((p->flags & PF_NOFREEZE) || p == current) {
+ next->task = p;
+ next = next->next;
+ }
+ toi_read_unlock_tasklist();
+ return 0;
+}
+
+static void pageset2_full(void)
+{
+ struct zone *zone;
+ struct page *page;
+ unsigned long flags;
+ int i;
+
+ toi_trace_index++;
+
+ for_each_populated_zone(zone) {
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ for_each_lru(i) {
+ if (!zone_page_state(zone, NR_LRU_BASE + i))
+ continue;
+
+ list_for_each_entry(page, &zone->lruvec.lists[i], lru) {
+ struct address_space *mapping;
+
+ mapping = page_mapping(page);
+ if (!mapping || !mapping->host ||
+ !(mapping->host->i_flags & S_ATOMIC_COPY)) {
+ if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+ TOI_TRACE_DEBUG(page_to_pfn(page), "_Pageset2 unmodified.");
+ } else {
+ TOI_TRACE_DEBUG(page_to_pfn(page), "_Pageset2 pageset2_full.");
+ SetPagePageset2(page);
+ }
+ }
+ }
+ }
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ }
+}
+
+/*
+ * toi_mark_task_as_pageset
+ * Functionality : Marks all the saveable pages belonging to a given process
+ * as belonging to a particular pageset.
+ */
+
+static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+
+ mm = t->active_mm;
+
+ if (!mm || !mm->mmap)
+ return;
+
+ toi_trace_index++;
+
+ if (!irqs_disabled())
+ down_read(&mm->mmap_sem);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long posn;
+
+ if (!vma->vm_start ||
+ vma->vm_flags & VM_PFNMAP)
+ continue;
+
+ for (posn = vma->vm_start; posn < vma->vm_end;
+ posn += PAGE_SIZE) {
+ struct page *page = follow_page(vma, posn, 0);
+ struct address_space *mapping;
+
+ if (!page || !pfn_valid(page_to_pfn(page)))
+ continue;
+
+ mapping = page_mapping(page);
+ if (mapping && mapping->host &&
+ mapping->host->i_flags & S_ATOMIC_COPY && pageset2)
+ continue;
+
+ if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+ TOI_TRACE_DEBUG(page_to_pfn(page), "_Unmodified %d", pageset2 ? 1 : 2);
+ continue;
+ }
+
+ if (pageset2) {
+ TOI_TRACE_DEBUG(page_to_pfn(page), "_MarkTaskAsPageset 1");
+ SetPagePageset2(page);
+ } else {
+ TOI_TRACE_DEBUG(page_to_pfn(page), "_MarkTaskAsPageset 2");
+ ClearPagePageset2(page);
+ SetPagePageset1(page);
+ }
+ }
+ }
+
+ if (!irqs_disabled())
+ up_read(&mm->mmap_sem);
+}
+
+static void mark_tasks(int pageset)
+{
+ struct task_struct *p;
+
+ toi_read_lock_tasklist();
+ for_each_process(p) {
+ if (!p->mm)
+ continue;
+
+ if (p->flags & PF_KTHREAD)
+ continue;
+
+ toi_mark_task_as_pageset(p, pageset);
+ }
+ toi_read_unlock_tasklist();
+
+}
+
+/* mark_pages_for_pageset2
+ *
+ * Description: Mark unshared pages in processes not needed for hibernate as
+ * being able to be written out in a separate pagedir.
+ * HighMem pages are simply marked as pageset2. They won't be
+ * needed during hibernate.
+ */
+
+static void toi_mark_pages_for_pageset2(void)
+{
+ struct attention_list *this = attention_list;
+
+ memory_bm_clear(pageset2_map);
+
+ if (test_action_state(TOI_NO_PAGESET2) || no_ps2_needed)
+ return;
+
+ if (test_action_state(TOI_PAGESET2_FULL))
+ pageset2_full();
+ else
+ mark_tasks(PAGESET2);
+
+ /*
+ * Because the tasks in attention_list are ones related to hibernating,
+ * we know that they won't go away under us.
+ */
+
+ while (this) {
+ if (!test_result_state(TOI_ABORTED))
+ toi_mark_task_as_pageset(this->task, PAGESET1);
+ this = this->next;
+ }
+}
+
+/*
+ * The atomic copy of pageset1 is stored in pageset2 pages.
+ * But if pageset1 is larger (normally only just after boot),
+ * we need to allocate extra pages to store the atomic copy.
+ * The following data struct and functions are used to handle
+ * the allocation and freeing of that memory.
+ */
+
+static unsigned long extra_pages_allocated;
+
+struct extras {
+ struct page *page;
+ int order;
+ struct extras *next;
+};
+
+static struct extras *extras_list;
+
+/* toi_free_extra_pagedir_memory
+ *
+ * Description: Free previously allocated extra pagedir memory.
+ */
+void toi_free_extra_pagedir_memory(void)
+{
+ /* Free allocated pages */
+ while (extras_list) {
+ struct extras *this = extras_list;
+ int i;
+
+ extras_list = this->next;
+
+ for (i = 0; i < (1 << this->order); i++)
+ ClearPageNosave(this->page + i);
+
+ toi_free_pages(9, this->page, this->order);
+ toi_kfree(7, this, sizeof(*this));
+ }
+
+ extra_pages_allocated = 0;
+}
+
+/* toi_allocate_extra_pagedir_memory
+ *
+ * Description: Allocate memory for making the atomic copy of pagedir1 in the
+ * case where it is bigger than pagedir2.
+ * Arguments: int num_to_alloc: Number of extra pages needed.
+ * Result: int. Number of extra pages we now have allocated.
+ */
+static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
+{
+ int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
+ gfp_t flags = TOI_ATOMIC_GFP;
+
+ if (num_to_alloc < 1)
+ return 0;
+
+ order = fls(num_to_alloc);
+ if (order >= MAX_ORDER)
+ order = MAX_ORDER - 1;
+
+ while (num_to_alloc) {
+ struct page *newpage;
+ unsigned long virt;
+ struct extras *extras_entry;
+
+ while ((1 << order) > num_to_alloc)
+ order--;
+
+ extras_entry = (struct extras *) toi_kzalloc(7,
+ sizeof(struct extras), TOI_ATOMIC_GFP);
+
+ if (!extras_entry)
+ return extra_pages_allocated;
+
+ virt = toi_get_free_pages(9, flags, order);
+ while (!virt && order) {
+ order--;
+ virt = toi_get_free_pages(9, flags, order);
+ }
+
+ if (!virt) {
+ toi_kfree(7, extras_entry, sizeof(*extras_entry));
+ return extra_pages_allocated;
+ }
+
+ newpage = virt_to_page(virt);
+
+ extras_entry->page = newpage;
+ extras_entry->order = order;
+ extras_entry->next = extras_list;
+
+ extras_list = extras_entry;
+
+ for (j = 0; j < (1 << order); j++) {
+ SetPageNosave(newpage + j);
+ SetPagePageset1Copy(newpage + j);
+ }
+
+ extra_pages_allocated += (1 << order);
+ num_to_alloc -= (1 << order);
+ }
+
+ return extra_pages_allocated;
+}
+
+/*
+ * real_nr_free_pages: Count pcp pages for a zone type or all zones
+ * (-1 for all, otherwise zone_idx() result desired).
+ */
+unsigned long real_nr_free_pages(unsigned long zone_idx_mask)
+{
+ struct zone *zone;
+ int result = 0, cpu;
+
+ /* PCP lists */
+ for_each_populated_zone(zone) {
+ if (!(zone_idx_mask & (1 << zone_idx(zone))))
+ continue;
+
+ for_each_online_cpu(cpu) {
+ struct per_cpu_pageset *pset =
+ per_cpu_ptr(zone->pageset, cpu);
+ struct per_cpu_pages *pcp = &pset->pcp;
+ result += pcp->count;
+ }
+
+ result += zone_page_state(zone, NR_FREE_PAGES);
+ }
+ return result;
+}
+
+/*
+ * Discover how much extra memory will be required by the drivers
+ * when they're asked to hibernate. We can then ensure that amount
+ * of memory is available when we really want it.
+ */
+static void get_extra_pd1_allowance(void)
+{
+ unsigned long orig_num_free = real_nr_free_pages(all_zones_mask), final;
+
+ toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
+
+ if (toi_go_atomic(PMSG_FREEZE, 1))
+ return;
+
+ final = real_nr_free_pages(all_zones_mask);
+ toi_end_atomic(ATOMIC_ALL_STEPS, 1, 0);
+
+ extra_pd1_pages_allowance = (orig_num_free > final) ?
+ orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE :
+ MIN_EXTRA_PAGES_ALLOWANCE;
+}
+
+/*
+ * Amount of storage needed, possibly taking into account the
+ * expected compression ratio and possibly also ignoring our
+ * allowance for extra pages.
+ */
+static unsigned long main_storage_needed(int use_ecr,
+ int ignore_extra_pd1_allow)
+{
+ return (pagedir1.size + pagedir2.size +
+ (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
+ (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
+}
+
+/*
+ * Storage needed for the image header, in bytes until the return.
+ */
+unsigned long get_header_storage_needed(void)
+{
+ unsigned long bytes = sizeof(struct toi_header) +
+ toi_header_storage_for_modules() +
+ toi_pageflags_space_needed() +
+ fs_info_space_needed();
+
+ return DIV_ROUND_UP(bytes, PAGE_SIZE);
+}
+
+/*
+ * When freeing memory, pages from either pageset might be freed.
+ *
+ * When seeking to free memory to be able to hibernate, for every ps1 page
+ * freed, we need 2 less pages for the atomic copy because there is one less
+ * page to copy and one more page into which data can be copied.
+ *
+ * Freeing ps2 pages saves us nothing directly. No more memory is available
+ * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
+ * that's too much work to figure out.
+ *
+ * => ps1_to_free functions
+ *
+ * Of course if we just want to reduce the image size, because of storage
+ * limitations or an image size limit either ps will do.
+ *
+ * => any_to_free function
+ */
+
+static unsigned long lowpages_usable_for_highmem_copy(void)
+{
+ unsigned long needed = get_lowmem_size(pagedir1) +
+ extra_pd1_pages_allowance + MIN_FREE_RAM +
+ toi_memory_for_modules(0),
+ available = get_lowmem_size(pagedir2) +
+ real_nr_free_low_pages() + extra_pages_allocated;
+
+ return available > needed ? available - needed : 0;
+}
+
+static unsigned long highpages_ps1_to_free(void)
+{
+ unsigned long need = get_highmem_size(pagedir1),
+ available = get_highmem_size(pagedir2) +
+ real_nr_free_high_pages() +
+ lowpages_usable_for_highmem_copy();
+
+ return need > available ? DIV_ROUND_UP(need - available, 2) : 0;
+}
+
+static unsigned long lowpages_ps1_to_free(void)
+{
+ unsigned long needed = get_lowmem_size(pagedir1) +
+ extra_pd1_pages_allowance + MIN_FREE_RAM +
+ toi_memory_for_modules(0),
+ available = get_lowmem_size(pagedir2) +
+ real_nr_free_low_pages() + extra_pages_allocated;
+
+ return needed > available ? DIV_ROUND_UP(needed - available, 2) : 0;
+}
+
+static unsigned long current_image_size(void)
+{
+ return pagedir1.size + pagedir2.size + header_storage_needed;
+}
+
+static unsigned long storage_still_required(void)
+{
+ unsigned long needed = main_storage_needed(1, 1);
+ return needed > storage_limit ? needed - storage_limit : 0;
+}
+
+static unsigned long ram_still_required(void)
+{
+ unsigned long needed = MIN_FREE_RAM + toi_memory_for_modules(0) +
+ 2 * extra_pd1_pages_allowance,
+ available = real_nr_free_low_pages() + extra_pages_allocated;
+ return needed > available ? needed - available : 0;
+}
+
+unsigned long any_to_free(int use_image_size_limit)
+{
+ int use_soft_limit = use_image_size_limit && image_size_limit > 0;
+ unsigned long current_size = current_image_size(),
+ soft_limit = use_soft_limit ? (image_size_limit << 8) : 0,
+ to_free = use_soft_limit ? (current_size > soft_limit ?
+ current_size - soft_limit : 0) : 0,
+ storage_limit = storage_still_required(),
+ ram_limit = ram_still_required(),
+ first_max = max(to_free, storage_limit);
+
+ return max(first_max, ram_limit);
+}
+
+static int need_pageset2(void)
+{
+ return (real_nr_free_low_pages() + extra_pages_allocated -
+ 2 * extra_pd1_pages_allowance - MIN_FREE_RAM -
+ toi_memory_for_modules(0) - pagedir1.size) < pagedir2.size;
+}
+
+/* amount_needed
+ *
+ * Calculates the amount by which the image size needs to be reduced to meet
+ * our constraints.
+ */
+static unsigned long amount_needed(int use_image_size_limit)
+{
+ return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
+ any_to_free(use_image_size_limit));
+}
+
+static int image_not_ready(int use_image_size_limit)
+{
+ toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+ "Amount still needed (%lu) > 0:%u,"
+ " Storage allocd: %lu < %lu: %u.\n",
+ amount_needed(use_image_size_limit),
+ (amount_needed(use_image_size_limit) > 0),
+ main_storage_allocated,
+ main_storage_needed(1, 1),
+ main_storage_allocated < main_storage_needed(1, 1));
+
+ toi_cond_pause(0, NULL);
+
+ return (amount_needed(use_image_size_limit) > 0) ||
+ main_storage_allocated < main_storage_needed(1, 1);
+}
+
+static void display_failure_reason(int tries_exceeded)
+{
+ unsigned long storage_required = storage_still_required(),
+ ram_required = ram_still_required(),
+ high_ps1 = highpages_ps1_to_free(),
+ low_ps1 = lowpages_ps1_to_free();
+
+ printk(KERN_INFO "Failed to prepare the image because...\n");
+
+ if (!storage_limit) {
+ printk(KERN_INFO "- You need some storage available to be "
+ "able to hibernate.\n");
+ return;
+ }
+
+ if (tries_exceeded)
+ printk(KERN_INFO "- The maximum number of iterations was "
+ "reached without successfully preparing the "
+ "image.\n");
+
+ if (storage_required) {
+ printk(KERN_INFO " - We need at least %lu pages of storage "
+ "(ignoring the header), but only have %lu.\n",
+ main_storage_needed(1, 1),
+ main_storage_allocated);
+ set_abort_result(TOI_INSUFFICIENT_STORAGE);
+ }
+
+ if (ram_required) {
+ printk(KERN_INFO " - We need %lu more free pages of low "
+ "memory.\n", ram_required);
+ printk(KERN_INFO " Minimum free : %8d\n", MIN_FREE_RAM);
+ printk(KERN_INFO " + Reqd. by modules : %8lu\n",
+ toi_memory_for_modules(0));
+ printk(KERN_INFO " + 2 * extra allow : %8lu\n",
+ 2 * extra_pd1_pages_allowance);
+ printk(KERN_INFO " - Currently free : %8lu\n",
+ real_nr_free_low_pages());
+ printk(KERN_INFO " - Pages allocd : %8lu\n",
+ extra_pages_allocated);
+ printk(KERN_INFO " : ========\n");
+ printk(KERN_INFO " Still needed : %8lu\n",
+ ram_required);
+
+ /* Print breakdown of memory needed for modules */
+ toi_memory_for_modules(1);
+ set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+ }
+
+ if (high_ps1) {
+ printk(KERN_INFO "- We need to free %lu highmem pageset 1 "
+ "pages.\n", high_ps1);
+ set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+ }
+
+ if (low_ps1) {
+ printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
+ "pages.\n", low_ps1);
+ set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+ }
+}
+
+static void display_stats(int always, int sub_extra_pd1_allow)
+{
+ char buffer[255];
+ snprintf(buffer, 254,
+ "Free:%lu(%lu). Sets:%lu(%lu),%lu(%lu). "
+ "Nosave:%lu-%lu=%lu. Storage:%lu/%lu(%lu=>%lu). "
+ "Needed:%lu,%lu,%lu(%u,%lu,%lu,%ld) (PS2:%s)\n",
+
+ /* Free */
+ real_nr_free_pages(all_zones_mask),
+ real_nr_free_low_pages(),
+
+ /* Sets */
+ pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
+ pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
+
+ /* Nosave */
+ num_nosave, extra_pages_allocated,
+ num_nosave - extra_pages_allocated,
+
+ /* Storage */
+ main_storage_allocated,
+ storage_limit,
+ main_storage_needed(1, sub_extra_pd1_allow),
+ main_storage_needed(1, 1),
+
+ /* Needed */
+ lowpages_ps1_to_free(), highpages_ps1_to_free(),
+ any_to_free(1),
+ MIN_FREE_RAM, toi_memory_for_modules(0),
+ extra_pd1_pages_allowance,
+ image_size_limit,
+
+ need_pageset2() ? "yes" : "no");
+
+ if (always)
+ printk("%s", buffer);
+ else
+ toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
+}
+
+/* flag_image_pages
+ *
+ * This routine generates our lists of pages to be stored in each
+ * pageset. Since we store the data using extents, and adding new
+ * extents might allocate a new extent page, this routine may well
+ * be called more than once.
+ */
+static void flag_image_pages(int atomic_copy)
+{
+ int num_free = 0, num_unmodified = 0;
+ unsigned long loop;
+ struct zone *zone;
+
+ pagedir1.size = 0;
+ pagedir2.size = 0;
+
+ set_highmem_size(pagedir1, 0);
+ set_highmem_size(pagedir2, 0);
+
+ num_nosave = 0;
+ toi_trace_index++;
+
+ memory_bm_clear(pageset1_map);
+
+ toi_generate_free_page_map();
+
+ /*
+ * Pages not to be saved are marked Nosave irrespective of being
+ * reserved.
+ */
+ for_each_populated_zone(zone) {
+ int highmem = is_highmem(zone);
+
+ for (loop = 0; loop < zone->spanned_pages; loop++) {
+ unsigned long pfn = zone->zone_start_pfn + loop;
+ struct page *page;
+ int chunk_size;
+
+ if (!pfn_valid(pfn)) {
+ TOI_TRACE_DEBUG(pfn, "_Flag Invalid");
+ continue;
+ }
+
+ chunk_size = toi_size_of_free_region(zone, pfn);
+ if (chunk_size) {
+ unsigned long y;
+ for (y = pfn; y < pfn + chunk_size; y++) {
+ page = pfn_to_page(y);
+ TOI_TRACE_DEBUG(y, "_Flag Free");
+ ClearPagePageset1(page);
+ ClearPagePageset2(page);
+ }
+ num_free += chunk_size;
+ loop += chunk_size - 1;
+ continue;
+ }
+
+ page = pfn_to_page(pfn);
+
+ if (PageNosave(page)) {
+ char *desc = PagePageset1Copy(page) ? "Pageset1Copy" : "NoSave";
+ TOI_TRACE_DEBUG(pfn, "_Flag %s", desc);
+ num_nosave++;
+ continue;
+ }
+
+ page = highmem ? saveable_highmem_page(zone, pfn) :
+ saveable_page(zone, pfn);
+
+ if (!page) {
+ TOI_TRACE_DEBUG(pfn, "_Flag Nosave2");
+ num_nosave++;
+ continue;
+ }
+
+ if (PageTOI_RO(page) && test_result_state(TOI_KEPT_IMAGE)) {
+ TOI_TRACE_DEBUG(pfn, "_Unmodified");
+ num_unmodified++;
+ continue;
+ }
+
+ if (PagePageset2(page)) {
+ pagedir2.size++;
+ TOI_TRACE_DEBUG(pfn, "_Flag PS2");
+ if (PageHighMem(page))
+ inc_highmem_size(pagedir2);
+ else
+ SetPagePageset1Copy(page);
+ if (PageResave(page)) {
+ SetPagePageset1(page);
+ ClearPagePageset1Copy(page);
+ pagedir1.size++;
+ if (PageHighMem(page))
+ inc_highmem_size(pagedir1);
+ }
+ } else {
+ pagedir1.size++;
+ TOI_TRACE_DEBUG(pfn, "_Flag PS1");
+ SetPagePageset1(page);
+ if (PageHighMem(page))
+ inc_highmem_size(pagedir1);
+ }
+ }
+ }
+
+ if (!atomic_copy)
+ toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
+ "Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld)"
+ " + Unmodified (%d) + NumFree (%d) = %d.\n",
+ pagedir1.size, pagedir2.size, num_nosave, num_unmodified,
+ num_free, pagedir1.size + pagedir2.size + num_nosave + num_free);
+}
+
+void toi_recalculate_image_contents(int atomic_copy)
+{
+ memory_bm_clear(pageset1_map);
+ if (!atomic_copy) {
+ unsigned long pfn;
+ memory_bm_position_reset(pageset2_map);
+ for (pfn = memory_bm_next_pfn(pageset2_map, 0);
+ pfn != BM_END_OF_MAP;
+ pfn = memory_bm_next_pfn(pageset2_map, 0))
+ ClearPagePageset1Copy(pfn_to_page(pfn));
+ /* Need to call this before getting pageset1_size! */
+ toi_mark_pages_for_pageset2();
+ }
+ memory_bm_position_reset(pageset2_map);
+ flag_image_pages(atomic_copy);
+
+ if (!atomic_copy) {
+ storage_limit = toiActiveAllocator->storage_available();
+ display_stats(0, 0);
+ }
+}
+
+int try_allocate_extra_memory(void)
+{
+ unsigned long wanted = pagedir1.size + extra_pd1_pages_allowance -
+ get_lowmem_size(pagedir2);
+ if (wanted > extra_pages_allocated) {
+ unsigned long got = toi_allocate_extra_pagedir_memory(wanted);
+ if (wanted < got) {
+ toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+ "Want %d extra pages for pageset1, got %d.\n",
+ wanted, got);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* update_image
+ *
+ * Allocate [more] memory and storage for the image.
+ */
+static void update_image(int ps2_recalc)
+{
+ int old_header_req;
+ unsigned long seek;
+
+ if (try_allocate_extra_memory())
+ return;
+
+ if (ps2_recalc)
+ goto recalc;
+
+ thaw_kernel_threads();
+
+ /*
+ * Allocate remaining storage space, if possible, up to the
+ * maximum we know we'll need. It's okay to allocate the
+ * maximum if the writer is the swapwriter, but
+ * we don't want to grab all available space on an NFS share.
+ * We therefore ignore the expected compression ratio here,
+ * thereby trying to allocate the maximum image size we could
+ * need (assuming compression doesn't expand the image), but
+ * don't complain if we can't get the full amount we're after.
+ */
+
+ do {
+ int result;
+
+ old_header_req = header_storage_needed;
+ toiActiveAllocator->reserve_header_space(header_storage_needed);
+
+ /* How much storage is free with the reservation applied? */
+ storage_limit = toiActiveAllocator->storage_available();
+ seek = min(storage_limit, main_storage_needed(0, 0));
+
+ result = toiActiveAllocator->allocate_storage(seek);
+ if (result)
+ printk("Failed to allocate storage (%d).\n", result);
+
+ main_storage_allocated =
+ toiActiveAllocator->storage_allocated();
+
+ /* Need more header because more storage allocated? */
+ header_storage_needed = get_header_storage_needed();
+
+ } while (header_storage_needed > old_header_req);
+
+ if (freeze_kernel_threads())
+ set_abort_result(TOI_FREEZING_FAILED);
+
+recalc:
+ toi_recalculate_image_contents(0);
+}
+
+/* attempt_to_freeze
+ *
+ * Try to freeze processes.
+ */
+
+static int attempt_to_freeze(void)
+{
+ int result;
+
+ /* Stop processes before checking again */
+ toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
+ "filesystems.");
+ result = freeze_processes();
+
+ if (result)
+ set_abort_result(TOI_FREEZING_FAILED);
+
+ result = freeze_kernel_threads();
+
+ if (result)
+ set_abort_result(TOI_FREEZING_FAILED);
+
+ return result;
+}
+
+/* eat_memory
+ *
+ * Try to free some memory, either to meet hard or soft constraints on the image
+ * characteristics.
+ *
+ * Hard constraints:
+ * - Pageset1 must be < half of memory;
+ * - We must have enough memory free at resume time to have pageset1
+ * be able to be loaded in pages that don't conflict with where it has to
+ * be restored.
+ * Soft constraints
+ * - User specificied image size limit.
+ */
+static void eat_memory(void)
+{
+ unsigned long amount_wanted = 0;
+ int did_eat_memory = 0;
+
+ /*
+ * Note that if we have enough storage space and enough free memory, we
+ * may exit without eating anything. We give up when the last 10
+ * iterations ate no extra pages because we're not going to get much
+ * more anyway, but the few pages we get will take a lot of time.
+ *
+ * We freeze processes before beginning, and then unfreeze them if we
+ * need to eat memory until we think we have enough. If our attempts
+ * to freeze fail, we give up and abort.
+ */
+
+ amount_wanted = amount_needed(1);
+
+ switch (image_size_limit) {
+ case -1: /* Don't eat any memory */
+ if (amount_wanted > 0) {
+ set_abort_result(TOI_WOULD_EAT_MEMORY);
+ return;
+ }
+ break;
+ case -2: /* Free caches only */
+ drop_pagecache();
+ toi_recalculate_image_contents(0);
+ amount_wanted = amount_needed(1);
+ break;
+ default:
+ break;
+ }
+
+ if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
+ image_size_limit != -1) {
+ unsigned long request = amount_wanted;
+ unsigned long high_req = max(highpages_ps1_to_free(),
+ any_to_free(1));
+ unsigned long low_req = lowpages_ps1_to_free();
+ unsigned long got = 0;
+
+ toi_prepare_status(CLEAR_BAR,
+ "Seeking to free %ldMB of memory.",
+ MB(amount_wanted));
+
+ thaw_kernel_threads();
+
+ /*
+ * Ask for too many because shrink_memory_mask doesn't
+ * currently return enough most of the time.
+ */
+
+ if (low_req)
+ got = shrink_memory_mask(low_req, GFP_KERNEL);
+ if (high_req)
+ shrink_memory_mask(high_req - got, GFP_HIGHUSER);
+
+ did_eat_memory = 1;
+
+ toi_recalculate_image_contents(0);
+
+ amount_wanted = amount_needed(1);
+
+ printk(KERN_DEBUG "Asked shrink_memory_mask for %ld low pages &"
+ " %ld pages from anywhere, got %ld.\n",
+ high_req, low_req,
+ request - amount_wanted);
+
+ toi_cond_pause(0, NULL);
+
+ if (freeze_kernel_threads())
+ set_abort_result(TOI_FREEZING_FAILED);
+ }
+
+ if (did_eat_memory)
+ toi_recalculate_image_contents(0);
+}
+
+/* toi_prepare_image
+ *
+ * Entry point to the whole image preparation section.
+ *
+ * We do four things:
+ * - Freeze processes;
+ * - Ensure image size constraints are met;
+ * - Complete all the preparation for saving the image,
+ * including allocation of storage. The only memory
+ * that should be needed when we're finished is that
+ * for actually storing the image (and we know how
+ * much is needed for that because the modules tell
+ * us).
+ * - Make sure that all dirty buffers are written out.
+ */
+#define MAX_TRIES 2
+int toi_prepare_image(void)
+{
+ int result = 1, tries = 1;
+
+ main_storage_allocated = 0;
+ no_ps2_needed = 0;
+
+ if (attempt_to_freeze())
+ return 1;
+
+ lock_device_hotplug();
+ set_toi_state(TOI_DEVICE_HOTPLUG_LOCKED);
+
+ if (!extra_pd1_pages_allowance)
+ get_extra_pd1_allowance();
+
+ storage_limit = toiActiveAllocator->storage_available();
+
+ if (!storage_limit) {
+ printk(KERN_INFO "No storage available. Didn't try to prepare "
+ "an image.\n");
+ display_failure_reason(0);
+ set_abort_result(TOI_NOSTORAGE_AVAILABLE);
+ return 1;
+ }
+
+ if (build_attention_list()) {
+ abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+ "Unable to successfully prepare the image.\n");
+ return 1;
+ }
+
+ toi_recalculate_image_contents(0);
+
+ do {
+ toi_prepare_status(CLEAR_BAR,
+ "Preparing Image. Try %d.", tries);
+
+ eat_memory();
+
+ if (test_result_state(TOI_ABORTED))
+ break;
+
+ update_image(0);
+
+ tries++;
+
+ } while (image_not_ready(1) && tries <= MAX_TRIES &&
+ !test_result_state(TOI_ABORTED));
+
+ result = image_not_ready(0);
+
+ /* TODO: Handle case where need to remove existing image and resave
+ * instead of adding to incremental image. */
+
+ if (!test_result_state(TOI_ABORTED)) {
+ if (result) {
+ display_stats(1, 0);
+ display_failure_reason(tries > MAX_TRIES);
+ abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+ "Unable to successfully prepare the image.\n");
+ } else {
+ /* Pageset 2 needed? */
+ if (!need_pageset2() &&
+ test_action_state(TOI_NO_PS2_IF_UNNEEDED)) {
+ no_ps2_needed = 1;
+ toi_recalculate_image_contents(0);
+ update_image(1);
+ }
+
+ toi_cond_pause(1, "Image preparation complete.");
+ }
+ }
+
+ return result ? result : allocate_checksum_pages();
+}
diff --git a/kernel/power/tuxonice_prepare_image.h b/kernel/power/tuxonice_prepare_image.h
new file mode 100644
index 000000000..af6769ee2
--- /dev/null
+++ b/kernel/power/tuxonice_prepare_image.h
@@ -0,0 +1,38 @@
+/*
+ * kernel/power/tuxonice_prepare_image.h
+ *
+ * Copyright (C) 2003-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <asm/sections.h>
+
+extern int toi_prepare_image(void);
+extern void toi_recalculate_image_contents(int storage_available);
+extern unsigned long real_nr_free_pages(unsigned long zone_idx_mask);
+extern long image_size_limit;
+extern void toi_free_extra_pagedir_memory(void);
+extern unsigned long extra_pd1_pages_allowance;
+extern void free_attention_list(void);
+
+#define MIN_FREE_RAM 100
+#define MIN_EXTRA_PAGES_ALLOWANCE 500
+
+#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
+#ifdef CONFIG_HIGHMEM
+#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
+ (1 << ZONE_HIGHMEM)))
+#else
+#define real_nr_free_high_pages() (0)
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
+
+/* For eat_memory function */
+#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
+#endif
+
+unsigned long get_header_storage_needed(void);
+unsigned long any_to_free(int use_image_size_limit);
+int try_allocate_extra_memory(void);
diff --git a/kernel/power/tuxonice_prune.c b/kernel/power/tuxonice_prune.c
new file mode 100644
index 000000000..710e48dee
--- /dev/null
+++ b/kernel/power/tuxonice_prune.c
@@ -0,0 +1,406 @@
+/*
+ * kernel/power/tuxonice_prune.c
+ *
+ * Copyright (C) 2012 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file implements a TuxOnIce module that seeks to prune the
+ * amount of data written to disk. It builds a table of hashes
+ * of the uncompressed data, and writes the pfn of the previous page
+ * with the same contents instead of repeating the data when a match
+ * is found.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <crypto/hash.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+/*
+ * We never write a page bigger than PAGE_SIZE, so use a large number
+ * to indicate that data is a PFN.
+ */
+#define PRUNE_DATA_IS_PFN (PAGE_SIZE + 100)
+
+static unsigned long toi_pruned_pages;
+
+static struct toi_module_ops toi_prune_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_prune_hash_algo_name[32] = "sha1";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+ struct shash_desc desc;
+ char *digest;
+};
+
+#define OUT_BUF_SIZE (2 * PAGE_SIZE)
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_prune_crypto_prepare(void)
+{
+ int cpu, ret, digestsize;
+
+ if (!*toi_prune_hash_algo_name) {
+ printk(KERN_INFO "TuxOnIce: Pruning enabled but no "
+ "hash algorithm set.\n");
+ return 1;
+ }
+
+ for_each_online_cpu(cpu) {
+ struct cpu_context *this = &per_cpu(contexts, cpu);
+ this->desc.tfm = crypto_alloc_shash(toi_prune_hash_algo_name, 0, 0);
+ if (IS_ERR(this->desc.tfm)) {
+ printk(KERN_INFO "TuxOnIce: Failed to allocate the "
+ "%s prune hash algorithm.\n",
+ toi_prune_hash_algo_name);
+ this->desc.tfm = NULL;
+ return 1;
+ }
+
+ if (!digestsize)
+ digestsize = crypto_shash_digestsize(this->desc.tfm);
+
+ this->digest = kmalloc(digestsize, GFP_KERNEL);
+ if (!this->digest) {
+ printk(KERN_INFO "TuxOnIce: Failed to allocate space "
+ "for digest output.\n");
+ crypto_free_shash(this->desc.tfm);
+ this->desc.tfm = NULL;
+ }
+
+ this->desc.flags = 0;
+
+ ret = crypto_shash_init(&this->desc);
+ if (ret < 0) {
+ printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+ "%s prune hash algorithm.\n",
+ toi_prune_hash_algo_name);
+ kfree(this->digest);
+ this->digest = NULL;
+ crypto_free_shash(this->desc.tfm);
+ this->desc.tfm = NULL;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int toi_prune_rw_cleanup(int writing)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ struct cpu_context *this = &per_cpu(contexts, cpu);
+ if (this->desc.tfm) {
+ crypto_free_shash(this->desc.tfm);
+ this->desc.tfm = NULL;
+ }
+
+ if (this->digest) {
+ kfree(this->digest);
+ this->digest = NULL;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * toi_prune_init
+ */
+
+static int toi_prune_init(int toi_or_resume)
+{
+ if (!toi_or_resume)
+ return 0;
+
+ toi_pruned_pages = 0;
+
+ next_driver = toi_get_next_filter(&toi_prune_ops);
+
+ return next_driver ? 0 : -ECHILD;
+}
+
+/*
+ * toi_prune_rw_init()
+ */
+
+static int toi_prune_rw_init(int rw, int stream_number)
+{
+ if (toi_prune_crypto_prepare()) {
+ printk(KERN_ERR "Failed to initialise prune "
+ "algorithm.\n");
+ if (rw == READ) {
+ printk(KERN_INFO "Unable to read the image.\n");
+ return -ENODEV;
+ } else {
+ printk(KERN_INFO "Continuing without "
+ "pruning the image.\n");
+ toi_prune_ops.enabled = 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * toi_prune_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page: Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be checked.
+ *
+ * Returns: 0 on success. Otherwise the error is that returned by later
+ * modules, -ECHILD if we have a broken pipeline or -EIO if
+ * zlib errs.
+ */
+static int toi_prune_write_page(unsigned long index, int buf_type,
+ void *buffer_page, unsigned int buf_size)
+{
+ int ret = 0, cpu = smp_processor_id(), write_data = 1;
+ struct cpu_context *ctx = &per_cpu(contexts, cpu);
+ u8* output_buffer = buffer_page;
+ int output_len = buf_size;
+ int out_buf_type = buf_type;
+ void *buffer_start;
+ u32 buf[4];
+
+ if (ctx->desc.tfm) {
+
+ buffer_start = TOI_MAP(buf_type, buffer_page);
+ ctx->len = OUT_BUF_SIZE;
+
+ ret = crypto_shash_digest(&ctx->desc, buffer_start, buf_size, &ctx->digest);
+ if (ret) {
+ printk(KERN_INFO "TuxOnIce: Failed to calculate digest (%d).\n", ret);
+ } else {
+ mutex_lock(&stats_lock);
+
+ toi_pruned_pages++;
+
+ mutex_unlock(&stats_lock);
+
+ }
+
+ TOI_UNMAP(buf_type, buffer_page);
+ }
+
+ if (write_data)
+ ret = next_driver->write_page(index, out_buf_type,
+ output_buffer, output_len);
+ else
+ ret = next_driver->write_page(index, out_buf_type,
+ output_buffer, output_len);
+
+ return ret;
+}
+
+/*
+ * toi_prune_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules or from a previously loaded page and
+ * fill the input buffer.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_prune_read_page(unsigned long *index, int buf_type,
+ void *buffer_page, unsigned int *buf_size)
+{
+ int ret, cpu = smp_processor_id();
+ unsigned int len;
+ char *buffer_start;
+ struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+ if (!ctx->desc.tfm)
+ return next_driver->read_page(index, TOI_PAGE, buffer_page,
+ buf_size);
+
+ /*
+ * All our reads must be synchronous - we can't handle
+ * data that hasn't been read yet.
+ */
+
+ ret = next_driver->read_page(index, buf_type, buffer_page, &len);
+
+ if (len == PRUNE_DATA_IS_PFN) {
+ buffer_start = kmap(buffer_page);
+ }
+
+ return ret;
+}
+
+/*
+ * toi_prune_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_prune_print_debug_stats(char *buffer, int size)
+{
+ int len;
+
+ /* Output the number of pages pruned. */
+ if (*toi_prune_hash_algo_name)
+ len = scnprintf(buffer, size, "- Compressor is '%s'.\n",
+ toi_prune_hash_algo_name);
+ else
+ len = scnprintf(buffer, size, "- Compressor is not set.\n");
+
+ if (toi_pruned_pages)
+ len += scnprintf(buffer+len, size - len, " Pruned "
+ "%lu pages).\n",
+ toi_pruned_pages);
+ return len;
+}
+
+/*
+ * toi_prune_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_prune_memory_needed(void)
+{
+ return 2 * PAGE_SIZE;
+}
+
+static int toi_prune_storage_needed(void)
+{
+ return 2 * sizeof(unsigned long) + 2 * sizeof(int) +
+ strlen(toi_prune_hash_algo_name) + 1;
+}
+
+/*
+ * toi_prune_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_prune_save_config_info(char *buffer)
+{
+ int len = strlen(toi_prune_hash_algo_name) + 1, offset = 0;
+
+ *((unsigned long *) buffer) = toi_pruned_pages;
+ offset += sizeof(unsigned long);
+ *((int *) (buffer + offset)) = len;
+ offset += sizeof(int);
+ strncpy(buffer + offset, toi_prune_hash_algo_name, len);
+ return offset + len;
+}
+
+/* toi_prune_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description: Reload information needed for passing back to the
+ * resumed kernel.
+ */
+static void toi_prune_load_config_info(char *buffer, int size)
+{
+ int len, offset = 0;
+
+ toi_pruned_pages = *((unsigned long *) buffer);
+ offset += sizeof(unsigned long);
+ len = *((int *) (buffer + offset));
+ offset += sizeof(int);
+ strncpy(toi_prune_hash_algo_name, buffer + offset, len);
+}
+
+static void toi_prune_pre_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+ bkd->pruned_pages = toi_pruned_pages;
+}
+
+static void toi_prune_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+ toi_pruned_pages = bkd->pruned_pages;
+}
+
+/*
+ * toi_expected_ratio
+ *
+ * Description: Returns the expected ratio between data passed into this module
+ * and the amount of data output when writing.
+ * Returns: 100 - we have no idea how many pages will be pruned.
+ */
+
+static int toi_prune_expected_ratio(void)
+{
+ return 100;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_INT("enabled", SYSFS_RW, &toi_prune_ops.enabled, 0, 1, 0,
+ NULL),
+ SYSFS_STRING("algorithm", SYSFS_RW, toi_prune_hash_algo_name, 31, 0, NULL),
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_prune_ops = {
+ .type = FILTER_MODULE,
+ .name = "prune",
+ .directory = "prune",
+ .module = THIS_MODULE,
+ .initialise = toi_prune_init,
+ .memory_needed = toi_prune_memory_needed,
+ .print_debug_info = toi_prune_print_debug_stats,
+ .save_config_info = toi_prune_save_config_info,
+ .load_config_info = toi_prune_load_config_info,
+ .storage_needed = toi_prune_storage_needed,
+ .expected_compression = toi_prune_expected_ratio,
+
+ .pre_atomic_restore = toi_prune_pre_atomic_restore,
+ .post_atomic_restore = toi_prune_post_atomic_restore,
+
+ .rw_init = toi_prune_rw_init,
+ .rw_cleanup = toi_prune_rw_cleanup,
+
+ .write_page = toi_prune_write_page,
+ .read_page = toi_prune_read_page,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_prune_load(void)
+{
+ return toi_register_module(&toi_prune_ops);
+}
+
+late_initcall(toi_prune_load);
diff --git a/kernel/power/tuxonice_storage.c b/kernel/power/tuxonice_storage.c
new file mode 100644
index 000000000..e99f6e24f
--- /dev/null
+++ b/kernel/power/tuxonice_storage.c
@@ -0,0 +1,282 @@
+/*
+ * kernel/power/tuxonice_storage.c
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for talking to a userspace program that manages storage.
+ *
+ * The kernel side:
+ * - starts the userspace program;
+ * - sends messages telling it when to open and close the connection;
+ * - tells it when to quit;
+ *
+ * The user space side:
+ * - passes messages regarding status;
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_ui.h"
+
+static struct user_helper_data usm_helper_data;
+static struct toi_module_ops usm_ops;
+static int message_received, usm_prepare_count;
+static int storage_manager_last_action, storage_manager_action;
+
+static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int type;
+ int *data;
+
+ type = nlh->nlmsg_type;
+
+ /* A control message: ignore them */
+ if (type < NETLINK_MSG_BASE)
+ return 0;
+
+ /* Unknown message: reply with EINVAL */
+ if (type >= USM_MSG_MAX)
+ return -EINVAL;
+
+ /* All operations require privileges, even GET */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ /* Only allow one task to receive NOFREEZE privileges */
+ if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
+ return -EBUSY;
+
+ data = (int *) NLMSG_DATA(nlh);
+
+ switch (type) {
+ case USM_MSG_SUCCESS:
+ case USM_MSG_FAILED:
+ message_received = type;
+ complete(&usm_helper_data.wait_for_process);
+ break;
+ default:
+ printk(KERN_INFO "Storage manager doesn't recognise "
+ "message %d.\n", type);
+ }
+
+ return 1;
+}
+
+#ifdef CONFIG_NET
+static int activations;
+
+int toi_activate_storage(int force)
+{
+ int tries = 1;
+
+ if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+ return 0;
+
+ message_received = 0;
+ activations++;
+
+ if (activations > 1 && !force)
+ return 0;
+
+ while ((!message_received || message_received == USM_MSG_FAILED) &&
+ tries < 2) {
+ toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
+ "%d.\n", tries);
+
+ init_completion(&usm_helper_data.wait_for_process);
+
+ toi_send_netlink_message(&usm_helper_data,
+ USM_MSG_CONNECT,
+ NULL, 0);
+
+ /* Wait 2 seconds for the userspace process to make contact */
+ wait_for_completion_timeout(&usm_helper_data.wait_for_process,
+ 2*HZ);
+
+ tries++;
+ }
+
+ return 0;
+}
+
+int toi_deactivate_storage(int force)
+{
+ if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+ return 0;
+
+ message_received = 0;
+ activations--;
+
+ if (activations && !force)
+ return 0;
+
+ init_completion(&usm_helper_data.wait_for_process);
+
+ toi_send_netlink_message(&usm_helper_data,
+ USM_MSG_DISCONNECT,
+ NULL, 0);
+
+ wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
+
+ if (!message_received || message_received == USM_MSG_FAILED) {
+ printk(KERN_INFO "Returning failure disconnecting storage.\n");
+ return 1;
+ }
+
+ return 0;
+}
+#endif
+
+static void storage_manager_simulate(void)
+{
+ printk(KERN_INFO "--- Storage manager simulate ---\n");
+ toi_prepare_usm();
+ schedule();
+ printk(KERN_INFO "--- Activate storage 1 ---\n");
+ toi_activate_storage(1);
+ schedule();
+ printk(KERN_INFO "--- Deactivate storage 1 ---\n");
+ toi_deactivate_storage(1);
+ schedule();
+ printk(KERN_INFO "--- Cleanup usm ---\n");
+ toi_cleanup_usm();
+ schedule();
+ printk(KERN_INFO "--- Storage manager simulate ends ---\n");
+}
+
+static int usm_storage_needed(void)
+{
+ return sizeof(int) + strlen(usm_helper_data.program) + 1;
+}
+
+static int usm_save_config_info(char *buf)
+{
+ int len = strlen(usm_helper_data.program);
+ memcpy(buf, usm_helper_data.program, len + 1);
+ return sizeof(int) + len + 1;
+}
+
+static void usm_load_config_info(char *buf, int size)
+{
+ /* Don't load the saved path if one has already been set */
+ if (usm_helper_data.program[0])
+ return;
+
+ memcpy(usm_helper_data.program, buf + sizeof(int), *((int *) buf));
+}
+
+static int usm_memory_needed(void)
+{
+ /* ball park figure of 32 pages */
+ return 32 * PAGE_SIZE;
+}
+
+/* toi_prepare_usm
+ */
+int toi_prepare_usm(void)
+{
+ usm_prepare_count++;
+
+ if (usm_prepare_count > 1 || !usm_ops.enabled)
+ return 0;
+
+ usm_helper_data.pid = -1;
+
+ if (!*usm_helper_data.program)
+ return 0;
+
+ toi_netlink_setup(&usm_helper_data);
+
+ if (usm_helper_data.pid == -1)
+ printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
+ " start it.\n");
+
+ toi_activate_storage(0);
+
+ return usm_helper_data.pid != -1;
+}
+
+void toi_cleanup_usm(void)
+{
+ usm_prepare_count--;
+
+ if (usm_helper_data.pid > -1 && !usm_prepare_count) {
+ toi_deactivate_storage(0);
+ toi_netlink_close(&usm_helper_data);
+ }
+}
+
+static void storage_manager_activate(void)
+{
+ if (storage_manager_action == storage_manager_last_action)
+ return;
+
+ if (storage_manager_action)
+ toi_prepare_usm();
+ else
+ toi_cleanup_usm();
+
+ storage_manager_last_action = storage_manager_action;
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_NONE("simulate_atomic_copy", storage_manager_simulate),
+ SYSFS_INT("enabled", SYSFS_RW, &usm_ops.enabled, 0, 1, 0, NULL),
+ SYSFS_STRING("program", SYSFS_RW, usm_helper_data.program, 254, 0,
+ NULL),
+ SYSFS_INT("activate_storage", SYSFS_RW , &storage_manager_action, 0, 1,
+ 0, storage_manager_activate)
+};
+
+static struct toi_module_ops usm_ops = {
+ .type = MISC_MODULE,
+ .name = "usm",
+ .directory = "storage_manager",
+ .module = THIS_MODULE,
+ .storage_needed = usm_storage_needed,
+ .save_config_info = usm_save_config_info,
+ .load_config_info = usm_load_config_info,
+ .memory_needed = usm_memory_needed,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* toi_usm_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+int toi_usm_init(void)
+{
+ usm_helper_data.nl = NULL;
+ usm_helper_data.program[0] = '\0';
+ usm_helper_data.pid = -1;
+ usm_helper_data.skb_size = 0;
+ usm_helper_data.pool_limit = 6;
+ usm_helper_data.netlink_id = NETLINK_TOI_USM;
+ usm_helper_data.name = "userspace storage manager";
+ usm_helper_data.rcv_msg = usm_user_rcv_msg;
+ usm_helper_data.interface_version = 2;
+ usm_helper_data.must_init = 0;
+ init_completion(&usm_helper_data.wait_for_process);
+
+ return toi_register_module(&usm_ops);
+}
+
+void toi_usm_exit(void)
+{
+ toi_netlink_close_complete(&usm_helper_data);
+ toi_unregister_module(&usm_ops);
+}
diff --git a/kernel/power/tuxonice_storage.h b/kernel/power/tuxonice_storage.h
new file mode 100644
index 000000000..1ed9ab156
--- /dev/null
+++ b/kernel/power/tuxonice_storage.h
@@ -0,0 +1,45 @@
+/*
+ * kernel/power/tuxonice_storage.h
+ *
+ * Copyright (C) 2005-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_NET
+int toi_prepare_usm(void);
+void toi_cleanup_usm(void);
+
+int toi_activate_storage(int force);
+int toi_deactivate_storage(int force);
+extern int toi_usm_init(void);
+extern void toi_usm_exit(void);
+#else
+static inline int toi_usm_init(void) { return 0; }
+static inline void toi_usm_exit(void) { }
+
+static inline int toi_activate_storage(int force)
+{
+ return 0;
+}
+
+static inline int toi_deactivate_storage(int force)
+{
+ return 0;
+}
+
+static inline int toi_prepare_usm(void) { return 0; }
+static inline void toi_cleanup_usm(void) { }
+#endif
+
+enum {
+ USM_MSG_BASE = 0x10,
+
+ /* Kernel -> Userspace */
+ USM_MSG_CONNECT = 0x30,
+ USM_MSG_DISCONNECT = 0x31,
+ USM_MSG_SUCCESS = 0x40,
+ USM_MSG_FAILED = 0x41,
+
+ USM_MSG_MAX,
+};
diff --git a/kernel/power/tuxonice_swap.c b/kernel/power/tuxonice_swap.c
new file mode 100644
index 000000000..ce3215033
--- /dev/null
+++ b/kernel/power/tuxonice_swap.c
@@ -0,0 +1,474 @@
+/*
+ * kernel/power/tuxonice_swap.c
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of swap space as a
+ * backing store.
+ */
+
+#include <linux/suspend.h>
+#include <linux/blkdev.h>
+#include <linux/swapops.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/fs_uuid.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_bio.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_builtin.h"
+
+static struct toi_module_ops toi_swapops;
+
+/* For swapfile automatically swapon/off'd. */
+static char swapfilename[255] = "";
+static int toi_swapon_status;
+
+/* Swap Pages */
+static unsigned long swap_allocated;
+
+static struct sysinfo swapinfo;
+
+static int is_ram_backed(struct swap_info_struct *si)
+{
+ if (!strncmp(si->bdev->bd_disk->disk_name, "ram", 3) ||
+ !strncmp(si->bdev->bd_disk->disk_name, "zram", 4))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
+ *
+ * Activate the given swapfile if it wasn't already enabled. Remember whether
+ * we really did swapon it for swapoffing later.
+ */
+static void enable_swapfile(void)
+{
+ int activateswapresult = -EINVAL;
+
+ if (swapfilename[0]) {
+ /* Attempt to swap on with maximum priority */
+ activateswapresult = sys_swapon(swapfilename, 0xFFFF);
+ if (activateswapresult && activateswapresult != -EBUSY)
+ printk(KERN_ERR "TuxOnIce: The swapfile/partition "
+ "specified by /sys/power/tuxonice/swap/swapfile"
+ " (%s) could not be turned on (error %d). "
+ "Attempting to continue.\n",
+ swapfilename, activateswapresult);
+ if (!activateswapresult)
+ toi_swapon_status = 1;
+ }
+}
+
+/**
+ * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
+ *
+ * If we did successfully swapon a file at the start of the cycle, swapoff
+ * it now (finishing up).
+ */
+static void disable_swapfile(void)
+{
+ if (!toi_swapon_status)
+ return;
+
+ sys_swapoff(swapfilename);
+ toi_swapon_status = 0;
+}
+
+static int add_blocks_to_extent_chain(struct toi_bdev_info *chain,
+ unsigned long start, unsigned long end)
+{
+ if (test_action_state(TOI_TEST_BIO))
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Adding extent %lu-%lu to "
+ "chain %p.", start << chain->bmap_shift,
+ end << chain->bmap_shift, chain);
+
+ return toi_add_to_extent_chain(&chain->blocks, start, end);
+}
+
+
+static int get_main_pool_phys_params(struct toi_bdev_info *chain)
+{
+ struct hibernate_extent *extentpointer = NULL;
+ unsigned long address, extent_min = 0, extent_max = 0;
+ int empty = 1;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "get main pool phys params for "
+ "chain %d.", chain->allocator_index);
+
+ if (!chain->allocations.first)
+ return 0;
+
+ if (chain->blocks.first)
+ toi_put_extent_chain(&chain->blocks);
+
+ toi_extent_for_each(&chain->allocations, extentpointer, address) {
+ swp_entry_t swap_address = (swp_entry_t) { address };
+ struct block_device *bdev;
+ sector_t new_sector = map_swap_entry(swap_address, &bdev);
+
+ if (empty) {
+ empty = 0;
+ extent_min = extent_max = new_sector;
+ continue;
+ }
+
+ if (new_sector == extent_max + 1) {
+ extent_max++;
+ continue;
+ }
+
+ if (add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
+ printk(KERN_ERR "Out of memory while making block "
+ "chains.\n");
+ return -ENOMEM;
+ }
+
+ extent_min = new_sector;
+ extent_max = new_sector;
+ }
+
+ if (!empty &&
+ add_blocks_to_extent_chain(chain, extent_min, extent_max)) {
+ printk(KERN_ERR "Out of memory while making block chains.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Like si_swapinfo, except that we don't include ram backed swap (compcache!)
+ * and don't need to use the spinlocks (userspace is stopped when this
+ * function is called).
+ */
+void si_swapinfo_no_compcache(void)
+{
+ unsigned int i;
+
+ si_swapinfo(&swapinfo);
+ swapinfo.freeswap = 0;
+ swapinfo.totalswap = 0;
+
+ for (i = 0; i < MAX_SWAPFILES; i++) {
+ struct swap_info_struct *si = get_swap_info_struct(i);
+ if (si && (si->flags & SWP_WRITEOK) && !is_ram_backed(si)) {
+ swapinfo.totalswap += si->inuse_pages;
+ swapinfo.freeswap += si->pages - si->inuse_pages;
+ }
+ }
+}
+/*
+ * We can't just remember the value from allocation time, because other
+ * processes might have allocated swap in the mean time.
+ */
+static unsigned long toi_swap_storage_available(void)
+{
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "In toi_swap_storage_available.");
+ si_swapinfo_no_compcache();
+ return swapinfo.freeswap + swap_allocated;
+}
+
+static int toi_swap_initialise(int starting_cycle)
+{
+ if (!starting_cycle)
+ return 0;
+
+ enable_swapfile();
+ return 0;
+}
+
+static void toi_swap_cleanup(int ending_cycle)
+{
+ if (!ending_cycle)
+ return;
+
+ disable_swapfile();
+}
+
+static void toi_swap_free_storage(struct toi_bdev_info *chain)
+{
+ /* Free swap entries */
+ struct hibernate_extent *extentpointer;
+ unsigned long extentvalue;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Freeing storage for chain %p.",
+ chain);
+
+ swap_allocated -= chain->allocations.size;
+ toi_extent_for_each(&chain->allocations, extentpointer, extentvalue)
+ swap_free((swp_entry_t) { extentvalue });
+
+ toi_put_extent_chain(&chain->allocations);
+}
+
+static void free_swap_range(unsigned long min, unsigned long max)
+{
+ int j;
+
+ for (j = min; j <= max; j++)
+ swap_free((swp_entry_t) { j });
+ swap_allocated -= (max - min + 1);
+}
+
+/*
+ * Allocation of a single swap type. Swap priorities are handled at the higher
+ * level.
+ */
+static int toi_swap_allocate_storage(struct toi_bdev_info *chain,
+ unsigned long request)
+{
+ unsigned long gotten = 0;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, " Swap allocate storage: Asked to"
+ " allocate %lu pages from device %d.", request,
+ chain->allocator_index);
+
+ while (gotten < request) {
+ swp_entry_t start, end;
+ if (0) {
+ /* Broken at the moment for SSDs */
+ get_swap_range_of_type(chain->allocator_index, &start, &end,
+ request - gotten + 1);
+ } else {
+ start = end = get_swap_page_of_type(chain->allocator_index);
+ }
+ if (start.val) {
+ int added = end.val - start.val + 1;
+ if (toi_add_to_extent_chain(&chain->allocations,
+ start.val, end.val)) {
+ printk(KERN_INFO "Failed to allocate extent for "
+ "%lu-%lu.\n", start.val, end.val);
+ free_swap_range(start.val, end.val);
+ break;
+ }
+ gotten += added;
+ swap_allocated += added;
+ } else
+ break;
+ }
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, " Allocated %lu pages.", gotten);
+ return gotten;
+}
+
+static int toi_swap_register_storage(void)
+{
+ int i, result = 0;
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "toi_swap_register_storage.");
+ for (i = 0; i < MAX_SWAPFILES; i++) {
+ struct swap_info_struct *si = get_swap_info_struct(i);
+ struct toi_bdev_info *devinfo;
+ unsigned char *p;
+ unsigned char buf[256];
+ struct fs_info *fs_info;
+
+ if (!si || !(si->flags & SWP_WRITEOK) || is_ram_backed(si))
+ continue;
+
+ devinfo = toi_kzalloc(39, sizeof(struct toi_bdev_info),
+ GFP_ATOMIC);
+ if (!devinfo) {
+ printk("Failed to allocate devinfo struct for swap "
+ "device %d.\n", i);
+ return -ENOMEM;
+ }
+
+ devinfo->bdev = si->bdev;
+ devinfo->allocator = &toi_swapops;
+ devinfo->allocator_index = i;
+
+ fs_info = fs_info_from_block_dev(si->bdev);
+ if (fs_info && !IS_ERR(fs_info)) {
+ memcpy(devinfo->uuid, &fs_info->uuid, 16);
+ free_fs_info(fs_info);
+ } else
+ result = (int) PTR_ERR(fs_info);
+
+ if (!fs_info)
+ printk("fs_info from block dev returned %d.\n", result);
+ devinfo->dev_t = si->bdev->bd_dev;
+ devinfo->prio = si->prio;
+ devinfo->bmap_shift = 3;
+ devinfo->blocks_per_page = 1;
+
+ p = d_path(&si->swap_file->f_path, buf, sizeof(buf));
+ sprintf(devinfo->name, "swap on %s", p);
+
+ toi_message(TOI_IO, TOI_VERBOSE, 0, "Registering swap storage:"
+ " Device %d (%lx), prio %d.", i,
+ (unsigned long) devinfo->dev_t, devinfo->prio);
+ toi_bio_ops.register_storage(devinfo);
+ }
+
+ return 0;
+}
+
+static unsigned long toi_swap_free_unused_storage(struct toi_bdev_info *chain, unsigned long used)
+{
+ struct hibernate_extent *extentpointer = NULL;
+ unsigned long extentvalue;
+ unsigned long i = 0, first_freed = 0;
+
+ toi_extent_for_each(&chain->allocations, extentpointer, extentvalue) {
+ i++;
+ if (i > used) {
+ swap_free((swp_entry_t) { extentvalue });
+ if (!first_freed)
+ first_freed = extentvalue;
+ }
+ }
+
+ return first_freed;
+}
+
+/*
+ * workspace_size
+ *
+ * Description:
+ * Returns the number of bytes of RAM needed for this
+ * code to do its work. (Used when calculating whether
+ * we have enough memory to be able to hibernate & resume).
+ *
+ */
+static int toi_swap_memory_needed(void)
+{
+ return 1;
+}
+
+/*
+ * Print debug info
+ *
+ * Description:
+ */
+static int toi_swap_print_debug_stats(char *buffer, int size)
+{
+ int len = 0;
+
+ len = scnprintf(buffer, size, "- Swap Allocator enabled.\n");
+ if (swapfilename[0])
+ len += scnprintf(buffer+len, size-len,
+ " Attempting to automatically swapon: %s.\n",
+ swapfilename);
+
+ si_swapinfo_no_compcache();
+
+ len += scnprintf(buffer+len, size-len,
+ " Swap available for image: %lu pages.\n",
+ swapinfo.freeswap + swap_allocated);
+
+ return len;
+}
+
+static int header_locations_read_sysfs(const char *page, int count)
+{
+ int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
+ struct inode *swapf = NULL;
+ int zone;
+ char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
+ char *path, *output = (char *) page;
+ int path_len;
+
+ if (!page)
+ return 0;
+
+ for (i = 0; i < MAX_SWAPFILES; i++) {
+ struct swap_info_struct *si = get_swap_info_struct(i);
+
+ if (!si || !(si->flags & SWP_WRITEOK))
+ continue;
+
+ if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
+ haveswap = 1;
+ if (!printedpartitionsmessage) {
+ len += sprintf(output + len,
+ "For swap partitions, simply use the "
+ "format: resume=swap:/dev/hda1.\n");
+ printedpartitionsmessage = 1;
+ }
+ } else {
+ path_len = 0;
+
+ path = d_path(&si->swap_file->f_path, path_page,
+ PAGE_SIZE);
+ path_len = snprintf(path_page, PAGE_SIZE, "%s", path);
+
+ haveswap = 1;
+ swapf = si->swap_file->f_mapping->host;
+ zone = bmap(swapf, 0);
+ if (!zone) {
+ len += sprintf(output + len,
+ "Swapfile %s has been corrupted. Reuse"
+ " mkswap on it and try again.\n",
+ path_page);
+ } else {
+ char name_buffer[BDEVNAME_SIZE];
+ len += sprintf(output + len,
+ "For swapfile `%s`,"
+ " use resume=swap:/dev/%s:0x%x.\n",
+ path_page,
+ bdevname(si->bdev, name_buffer),
+ zone << (swapf->i_blkbits - 9));
+ }
+ }
+ }
+
+ if (!haveswap)
+ len = sprintf(output, "You need to turn on swap partitions "
+ "before examining this file.\n");
+
+ toi_free_page(10, (unsigned long) path_page);
+ return len;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_STRING("swapfilename", SYSFS_RW, swapfilename, 255, 0, NULL),
+ SYSFS_CUSTOM("headerlocations", SYSFS_READONLY,
+ header_locations_read_sysfs, NULL, 0, NULL),
+ SYSFS_INT("enabled", SYSFS_RW, &toi_swapops.enabled, 0, 1, 0,
+ attempt_to_parse_resume_device2),
+};
+
+static struct toi_bio_allocator_ops toi_bio_swapops = {
+ .register_storage = toi_swap_register_storage,
+ .storage_available = toi_swap_storage_available,
+ .allocate_storage = toi_swap_allocate_storage,
+ .bmap = get_main_pool_phys_params,
+ .free_storage = toi_swap_free_storage,
+ .free_unused_storage = toi_swap_free_unused_storage,
+};
+
+static struct toi_module_ops toi_swapops = {
+ .type = BIO_ALLOCATOR_MODULE,
+ .name = "swap storage",
+ .directory = "swap",
+ .module = THIS_MODULE,
+ .memory_needed = toi_swap_memory_needed,
+ .print_debug_info = toi_swap_print_debug_stats,
+ .initialise = toi_swap_initialise,
+ .cleanup = toi_swap_cleanup,
+ .bio_allocator_ops = &toi_bio_swapops,
+
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_swap_load(void)
+{
+ return toi_register_module(&toi_swapops);
+}
+
+late_initcall(toi_swap_load);
diff --git a/kernel/power/tuxonice_sysfs.c b/kernel/power/tuxonice_sysfs.c
new file mode 100644
index 000000000..79c9315b6
--- /dev/null
+++ b/kernel/power/tuxonice_sysfs.c
@@ -0,0 +1,333 @@
+/*
+ * kernel/power/tuxonice_sysfs.c
+ *
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains support for sysfs entries for tuning TuxOnIce.
+ *
+ * We have a generic handler that deals with the most common cases, and
+ * hooks for special handlers to use.
+ */
+
+#include <linux/suspend.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_alloc.h"
+
+static int toi_sysfs_initialised;
+
+static void toi_initialise_sysfs(void);
+
+static struct toi_sysfs_data sysfs_params[];
+
+#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
+
+static void toi_main_wrapper(void)
+{
+ toi_try_hibernate();
+}
+
+static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *page)
+{
+ struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+ int len = 0;
+ int full_prep = sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ;
+
+ if (full_prep && toi_start_anything(0))
+ return -EBUSY;
+
+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+ toi_prepare_usm();
+
+ switch (sysfs_data->type) {
+ case TOI_SYSFS_DATA_CUSTOM:
+ len = (sysfs_data->data.special.read_sysfs) ?
+ (sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
+ : 0;
+ break;
+ case TOI_SYSFS_DATA_BIT:
+ len = sprintf(page, "%d\n",
+ -test_bit(sysfs_data->data.bit.bit,
+ sysfs_data->data.bit.bit_vector));
+ break;
+ case TOI_SYSFS_DATA_INTEGER:
+ len = sprintf(page, "%d\n",
+ *(sysfs_data->data.integer.variable));
+ break;
+ case TOI_SYSFS_DATA_LONG:
+ len = sprintf(page, "%ld\n",
+ *(sysfs_data->data.a_long.variable));
+ break;
+ case TOI_SYSFS_DATA_UL:
+ len = sprintf(page, "%lu\n",
+ *(sysfs_data->data.ul.variable));
+ break;
+ case TOI_SYSFS_DATA_STRING:
+ len = sprintf(page, "%s\n",
+ sysfs_data->data.string.variable);
+ break;
+ }
+
+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+ toi_cleanup_usm();
+
+ if (full_prep)
+ toi_finish_anything(0);
+
+ return len;
+}
+
+#define BOUND(_variable, _type) do { \
+ if (*_variable < sysfs_data->data._type.minimum) \
+ *_variable = sysfs_data->data._type.minimum; \
+ else if (*_variable > sysfs_data->data._type.maximum) \
+ *_variable = sysfs_data->data._type.maximum; \
+} while (0)
+
+static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *my_buf, size_t count)
+{
+ int assigned_temp_buffer = 0, result = count;
+ struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+
+ if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
+ return -EBUSY;
+
+ ((char *) my_buf)[count] = 0;
+
+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+ toi_prepare_usm();
+
+ switch (sysfs_data->type) {
+ case TOI_SYSFS_DATA_CUSTOM:
+ if (sysfs_data->data.special.write_sysfs)
+ result = (sysfs_data->data.special.write_sysfs)(my_buf,
+ count);
+ break;
+ case TOI_SYSFS_DATA_BIT:
+ {
+ unsigned long value;
+ result = kstrtoul(my_buf, 0, &value);
+ if (result)
+ break;
+ if (value)
+ set_bit(sysfs_data->data.bit.bit,
+ (sysfs_data->data.bit.bit_vector));
+ else
+ clear_bit(sysfs_data->data.bit.bit,
+ (sysfs_data->data.bit.bit_vector));
+ }
+ break;
+ case TOI_SYSFS_DATA_INTEGER:
+ {
+ long temp;
+ result = kstrtol(my_buf, 0, &temp);
+ if (result)
+ break;
+ *(sysfs_data->data.integer.variable) = (int) temp;
+ BOUND(sysfs_data->data.integer.variable, integer);
+ break;
+ }
+ case TOI_SYSFS_DATA_LONG:
+ {
+ long *variable =
+ sysfs_data->data.a_long.variable;
+ result = kstrtol(my_buf, 0, variable);
+ if (result)
+ break;
+ BOUND(variable, a_long);
+ break;
+ }
+ case TOI_SYSFS_DATA_UL:
+ {
+ unsigned long *variable =
+ sysfs_data->data.ul.variable;
+ result = kstrtoul(my_buf, 0, variable);
+ if (result)
+ break;
+ BOUND(variable, ul);
+ break;
+ }
+ break;
+ case TOI_SYSFS_DATA_STRING:
+ {
+ int copy_len = count;
+ char *variable =
+ sysfs_data->data.string.variable;
+
+ if (sysfs_data->data.string.max_length &&
+ (copy_len > sysfs_data->data.string.max_length))
+ copy_len = sysfs_data->data.string.max_length;
+
+ if (!variable) {
+ variable = (char *) toi_get_zeroed_page(31,
+ TOI_ATOMIC_GFP);
+ sysfs_data->data.string.variable = variable;
+ assigned_temp_buffer = 1;
+ }
+ strncpy(variable, my_buf, copy_len);
+ if (copy_len && my_buf[copy_len - 1] == '\n')
+ variable[count - 1] = 0;
+ variable[count] = 0;
+ }
+ break;
+ }
+
+ if (!result)
+ result = count;
+
+ /* Side effect routine? */
+ if (result == count && sysfs_data->write_side_effect)
+ sysfs_data->write_side_effect();
+
+ /* Free temporary buffers */
+ if (assigned_temp_buffer) {
+ toi_free_page(31,
+ (unsigned long) sysfs_data->data.string.variable);
+ sysfs_data->data.string.variable = NULL;
+ }
+
+ if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+ toi_cleanup_usm();
+
+ toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
+
+ return result;
+}
+
+static struct sysfs_ops toi_sysfs_ops = {
+ .show = &toi_attr_show,
+ .store = &toi_attr_store,
+};
+
+static struct kobj_type toi_ktype = {
+ .sysfs_ops = &toi_sysfs_ops,
+};
+
+struct kobject *tuxonice_kobj;
+
+/* Non-module sysfs entries.
+ *
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+ SYSFS_CUSTOM("do_hibernate", SYSFS_WRITEONLY, NULL, NULL,
+ SYSFS_HIBERNATING, toi_main_wrapper),
+ SYSFS_CUSTOM("do_resume", SYSFS_WRITEONLY, NULL, NULL,
+ SYSFS_RESUMING, toi_try_resume)
+};
+
+void remove_toi_sysdir(struct kobject *kobj)
+{
+ if (!kobj)
+ return;
+
+ kobject_put(kobj);
+}
+
+struct kobject *make_toi_sysdir(char *name)
+{
+ struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
+
+ if (!kobj) {
+ printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
+ "dir!\n");
+ return NULL;
+ }
+
+ kobj->ktype = &toi_ktype;
+
+ return kobj;
+}
+
+/* toi_register_sysfs_file
+ *
+ * Helper for registering a new /sysfs/tuxonice entry.
+ */
+
+int toi_register_sysfs_file(
+ struct kobject *kobj,
+ struct toi_sysfs_data *toi_sysfs_data)
+{
+ int result;
+
+ if (!toi_sysfs_initialised)
+ toi_initialise_sysfs();
+
+ result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
+ if (result)
+ printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
+ "returned %d.\n",
+ toi_sysfs_data->attr.name, result);
+ kobj->ktype = &toi_ktype;
+
+ return result;
+}
+
+/* toi_unregister_sysfs_file
+ *
+ * Helper for removing unwanted /sys/power/tuxonice entries.
+ *
+ */
+void toi_unregister_sysfs_file(struct kobject *kobj,
+ struct toi_sysfs_data *toi_sysfs_data)
+{
+ sysfs_remove_file(kobj, &toi_sysfs_data->attr);
+}
+
+void toi_cleanup_sysfs(void)
+{
+ int i,
+ numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+ if (!toi_sysfs_initialised)
+ return;
+
+ for (i = 0; i < numfiles; i++)
+ toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+ kobject_put(tuxonice_kobj);
+ toi_sysfs_initialised = 0;
+}
+
+/* toi_initialise_sysfs
+ *
+ * Initialise the /sysfs/tuxonice directory.
+ */
+
+static void toi_initialise_sysfs(void)
+{
+ int i;
+ int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+ if (toi_sysfs_initialised)
+ return;
+
+ /* Make our TuxOnIce directory a child of /sys/power */
+ tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
+ if (!tuxonice_kobj)
+ return;
+
+ toi_sysfs_initialised = 1;
+
+ for (i = 0; i < numfiles; i++)
+ toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+}
+
+int toi_sysfs_init(void)
+{
+ toi_initialise_sysfs();
+ return 0;
+}
+
+void toi_sysfs_exit(void)
+{
+ toi_cleanup_sysfs();
+}
diff --git a/kernel/power/tuxonice_sysfs.h b/kernel/power/tuxonice_sysfs.h
new file mode 100644
index 000000000..5b331b19a
--- /dev/null
+++ b/kernel/power/tuxonice_sysfs.h
@@ -0,0 +1,137 @@
+/*
+ * kernel/power/tuxonice_sysfs.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/sysfs.h>
+
+struct toi_sysfs_data {
+ struct attribute attr;
+ int type;
+ int flags;
+ union {
+ struct {
+ unsigned long *bit_vector;
+ int bit;
+ } bit;
+ struct {
+ int *variable;
+ int minimum;
+ int maximum;
+ } integer;
+ struct {
+ long *variable;
+ long minimum;
+ long maximum;
+ } a_long;
+ struct {
+ unsigned long *variable;
+ unsigned long minimum;
+ unsigned long maximum;
+ } ul;
+ struct {
+ char *variable;
+ int max_length;
+ } string;
+ struct {
+ int (*read_sysfs) (const char *buffer, int count);
+ int (*write_sysfs) (const char *buffer, int count);
+ void *data;
+ } special;
+ } data;
+
+ /* Side effects routine. Used, eg, for reparsing the
+ * resume= entry when it changes */
+ void (*write_side_effect) (void);
+ struct list_head sysfs_data_list;
+};
+
+enum {
+ TOI_SYSFS_DATA_NONE = 1,
+ TOI_SYSFS_DATA_CUSTOM,
+ TOI_SYSFS_DATA_BIT,
+ TOI_SYSFS_DATA_INTEGER,
+ TOI_SYSFS_DATA_UL,
+ TOI_SYSFS_DATA_LONG,
+ TOI_SYSFS_DATA_STRING
+};
+
+#define SYSFS_WRITEONLY 0200
+#define SYSFS_READONLY 0444
+#define SYSFS_RW 0644
+
+#define SYSFS_BIT(_name, _mode, _ul, _bit, _flags) { \
+ .attr = {.name = _name , .mode = _mode }, \
+ .type = TOI_SYSFS_DATA_BIT, \
+ .flags = _flags, \
+ .data = { .bit = { .bit_vector = _ul, .bit = _bit } } }
+
+#define SYSFS_INT(_name, _mode, _int, _min, _max, _flags, _wse) { \
+ .attr = {.name = _name , .mode = _mode }, \
+ .type = TOI_SYSFS_DATA_INTEGER, \
+ .flags = _flags, \
+ .data = { .integer = { .variable = _int, .minimum = _min, \
+ .maximum = _max } }, \
+ .write_side_effect = _wse }
+
+#define SYSFS_UL(_name, _mode, _ul, _min, _max, _flags) { \
+ .attr = {.name = _name , .mode = _mode }, \
+ .type = TOI_SYSFS_DATA_UL, \
+ .flags = _flags, \
+ .data = { .ul = { .variable = _ul, .minimum = _min, \
+ .maximum = _max } } }
+
+#define SYSFS_LONG(_name, _mode, _long, _min, _max, _flags) { \
+ .attr = {.name = _name , .mode = _mode }, \
+ .type = TOI_SYSFS_DATA_LONG, \
+ .flags = _flags, \
+ .data = { .a_long = { .variable = _long, .minimum = _min, \
+ .maximum = _max } } }
+
+#define SYSFS_STRING(_name, _mode, _string, _max_len, _flags, _wse) { \
+ .attr = {.name = _name , .mode = _mode }, \
+ .type = TOI_SYSFS_DATA_STRING, \
+ .flags = _flags, \
+ .data = { .string = { .variable = _string, .max_length = _max_len } }, \
+ .write_side_effect = _wse }
+
+#define SYSFS_CUSTOM(_name, _mode, _read, _write, _flags, _wse) { \
+ .attr = {.name = _name , .mode = _mode }, \
+ .type = TOI_SYSFS_DATA_CUSTOM, \
+ .flags = _flags, \
+ .data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }, \
+ .write_side_effect = _wse }
+
+#define SYSFS_NONE(_name, _wse) { \
+ .attr = {.name = _name , .mode = SYSFS_WRITEONLY }, \
+ .type = TOI_SYSFS_DATA_NONE, \
+ .write_side_effect = _wse, \
+}
+
+/* Flags */
+#define SYSFS_NEEDS_SM_FOR_READ 1
+#define SYSFS_NEEDS_SM_FOR_WRITE 2
+#define SYSFS_HIBERNATE 4
+#define SYSFS_RESUME 8
+#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
+#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_NEEDS_SM_FOR_BOTH \
+ (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
+
+int toi_register_sysfs_file(struct kobject *kobj,
+ struct toi_sysfs_data *toi_sysfs_data);
+void toi_unregister_sysfs_file(struct kobject *kobj,
+ struct toi_sysfs_data *toi_sysfs_data);
+
+extern struct kobject *tuxonice_kobj;
+
+struct kobject *make_toi_sysdir(char *name);
+void remove_toi_sysdir(struct kobject *obj);
+extern void toi_cleanup_sysfs(void);
+
+extern int toi_sysfs_init(void);
+extern void toi_sysfs_exit(void);
diff --git a/kernel/power/tuxonice_ui.c b/kernel/power/tuxonice_ui.c
new file mode 100644
index 000000000..c405f9b9a
--- /dev/null
+++ b/kernel/power/tuxonice_ui.c
@@ -0,0 +1,247 @@
+/*
+ * kernel/power/tuxonice_ui.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/reboot.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_builtin.h"
+
+static char local_printf_buf[1024]; /* Same as printk - should be safe */
+struct ui_ops *toi_current_ui;
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress, either from userui or /dev/console if userui isn't
+ * available. The non-userui path is particularly for at boot-time, prior
+ * to userui being started, when we have an important warning to give to
+ * the user.
+ */
+static char toi_wait_for_keypress(int timeout)
+{
+ if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
+ return ' ';
+
+ return toi_wait_for_keypress_dev_console(timeout);
+}
+
+/* toi_early_boot_message()
+ * Description: Handle errors early in the process of booting.
+ * The user may press C to continue booting, perhaps
+ * invalidating the image, or space to reboot.
+ * This works from either the serial console or normally
+ * attached keyboard.
+ *
+ * Note that we come in here from init, while the kernel is
+ * locked. If we want to get events from the serial console,
+ * we need to temporarily unlock the kernel.
+ *
+ * toi_early_boot_message may also be called post-boot.
+ * In this case, it simply printks the message and returns.
+ *
+ * Arguments: int Whether we are able to erase the image.
+ * int default_answer. What to do when we timeout. This
+ * will normally be continue, but the user might
+ * provide command line options (__setup) to override
+ * particular cases.
+ * Char *. Pointer to a string explaining why we're moaning.
+ */
+
+#define say(message, a...) printk(KERN_EMERG message, ##a)
+
+void toi_early_boot_message(int message_detail, int default_answer,
+ char *warning_reason, ...)
+{
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+ unsigned long orig_state = get_toi_state(), continue_req = 0;
+ unsigned long orig_loglevel = console_loglevel;
+ int can_ask = 1;
+#else
+ int can_ask = 0;
+#endif
+
+ va_list args;
+ int printed_len;
+
+ if (!toi_wait) {
+ set_toi_state(TOI_CONTINUE_REQ);
+ can_ask = 0;
+ }
+
+ if (warning_reason) {
+ va_start(args, warning_reason);
+ printed_len = vsnprintf(local_printf_buf,
+ sizeof(local_printf_buf),
+ warning_reason,
+ args);
+ va_end(args);
+ }
+
+ if (!test_toi_state(TOI_BOOT_TIME)) {
+ printk("TuxOnIce: %s\n", local_printf_buf);
+ return;
+ }
+
+ if (!can_ask) {
+ continue_req = !!default_answer;
+ goto post_ask;
+ }
+
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+ console_loglevel = 7;
+
+ say("=== TuxOnIce ===\n\n");
+ if (warning_reason) {
+ say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
+ switch (message_detail) {
+ case 0:
+ say("If you continue booting, note that any image WILL"
+ "NOT BE REMOVED.\nTuxOnIce is unable to do so "
+ "because the appropriate modules aren't\n"
+ "loaded. You should manually remove the image "
+ "to avoid any\npossibility of corrupting your "
+ "filesystem(s) later.\n");
+ break;
+ case 1:
+ say("If you want to use the current TuxOnIce image, "
+ "reboot and try\nagain with the same kernel "
+ "that you hibernated from. If you want\n"
+ "to forget that image, continue and the image "
+ "will be erased.\n");
+ break;
+ }
+ say("Press SPACE to reboot or C to continue booting with "
+ "this kernel\n\n");
+ if (toi_wait > 0)
+ say("Default action if you don't select one in %d "
+ "seconds is: %s.\n",
+ toi_wait,
+ default_answer == TOI_CONTINUE_REQ ?
+ "continue booting" : "reboot");
+ } else {
+ say("BIG FAT WARNING!!\n\n"
+ "You have tried to resume from this image before.\n"
+ "If it failed once, it may well fail again.\n"
+ "Would you like to remove the image and boot "
+ "normally?\nThis will be equivalent to entering "
+ "noresume on the\nkernel command line.\n\n"
+ "Press SPACE to remove the image or C to continue "
+ "resuming.\n\n");
+ if (toi_wait > 0)
+ say("Default action if you don't select one in %d "
+ "seconds is: %s.\n", toi_wait,
+ !!default_answer ?
+ "continue resuming" : "remove the image");
+ }
+ console_loglevel = orig_loglevel;
+
+ set_toi_state(TOI_SANITY_CHECK_PROMPT);
+ clear_toi_state(TOI_CONTINUE_REQ);
+
+ if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
+ continue_req = !!default_answer;
+ else
+ continue_req = test_toi_state(TOI_CONTINUE_REQ);
+
+#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
+
+post_ask:
+ if ((warning_reason) && (!continue_req))
+ kernel_restart(NULL);
+
+ restore_toi_state(orig_state);
+ if (continue_req)
+ set_toi_state(TOI_CONTINUE_REQ);
+}
+
+#undef say
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+ SYSFS_INT("default_console_level", SYSFS_RW,
+ &toi_bkd.toi_default_console_level, 0, 7, 0, NULL),
+ SYSFS_UL("debug_sections", SYSFS_RW, &toi_bkd.toi_debug_state, 0,
+ 1 << 30, 0),
+ SYSFS_BIT("log_everything", SYSFS_RW, &toi_bkd.toi_action, TOI_LOGALL,
+ 0)
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+ .type = MISC_HIDDEN_MODULE,
+ .name = "printk ui",
+ .directory = "user_interface",
+ .module = THIS_MODULE,
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+int toi_register_ui_ops(struct ui_ops *this_ui)
+{
+ if (toi_current_ui) {
+ printk(KERN_INFO "Only one TuxOnIce user interface module can "
+ "be loaded at a time.");
+ return -EBUSY;
+ }
+
+ toi_current_ui = this_ui;
+
+ return 0;
+}
+
+void toi_remove_ui_ops(struct ui_ops *this_ui)
+{
+ if (toi_current_ui != this_ui)
+ return;
+
+ toi_current_ui = NULL;
+}
+
+/* toi_console_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+
+int toi_ui_init(void)
+{
+ return toi_register_module(&userui_ops);
+}
+
+void toi_ui_exit(void)
+{
+ toi_unregister_module(&userui_ops);
+}
diff --git a/kernel/power/tuxonice_ui.h b/kernel/power/tuxonice_ui.h
new file mode 100644
index 000000000..d71c607f6
--- /dev/null
+++ b/kernel/power/tuxonice_ui.h
@@ -0,0 +1,97 @@
+/*
+ * kernel/power/tuxonice_ui.h
+ *
+ * Copyright (C) 2004-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ */
+
+enum {
+ DONT_CLEAR_BAR,
+ CLEAR_BAR
+};
+
+enum {
+ /* Userspace -> Kernel */
+ USERUI_MSG_ABORT = 0x11,
+ USERUI_MSG_SET_STATE = 0x12,
+ USERUI_MSG_GET_STATE = 0x13,
+ USERUI_MSG_GET_DEBUG_STATE = 0x14,
+ USERUI_MSG_SET_DEBUG_STATE = 0x15,
+ USERUI_MSG_SPACE = 0x18,
+ USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
+ USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
+ USERUI_MSG_GET_LOGLEVEL = 0x1C,
+ USERUI_MSG_SET_LOGLEVEL = 0x1D,
+ USERUI_MSG_PRINTK = 0x1E,
+
+ /* Kernel -> Userspace */
+ USERUI_MSG_MESSAGE = 0x21,
+ USERUI_MSG_PROGRESS = 0x22,
+ USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
+
+ USERUI_MSG_MAX,
+};
+
+struct userui_msg_params {
+ u32 a, b, c, d;
+ char text[255];
+};
+
+struct ui_ops {
+ char (*wait_for_key) (int timeout);
+ u32 (*update_status) (u32 value, u32 maximum, const char *fmt, ...);
+ void (*prepare_status) (int clearbar, const char *fmt, ...);
+ void (*cond_pause) (int pause, char *message);
+ void (*abort)(int result_code, const char *fmt, ...);
+ void (*prepare)(void);
+ void (*cleanup)(void);
+ void (*message)(u32 section, u32 level, u32 normally_logged,
+ const char *fmt, ...);
+};
+
+extern struct ui_ops *toi_current_ui;
+
+#define toi_update_status(val, max, fmt, args...) \
+ (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
+ max)
+
+#define toi_prepare_console(void) \
+ do { if (toi_current_ui) \
+ (toi_current_ui->prepare)(); \
+ } while (0)
+
+#define toi_cleanup_console(void) \
+ do { if (toi_current_ui) \
+ (toi_current_ui->cleanup)(); \
+ } while (0)
+
+#define abort_hibernate(result, fmt, args...) \
+ do { if (toi_current_ui) \
+ (toi_current_ui->abort)(result, fmt, ##args); \
+ else { \
+ set_abort_result(result); \
+ } \
+ } while (0)
+
+#define toi_cond_pause(pause, message) \
+ do { if (toi_current_ui) \
+ (toi_current_ui->cond_pause)(pause, message); \
+ } while (0)
+
+#define toi_prepare_status(clear, fmt, args...) \
+ do { if (toi_current_ui) \
+ (toi_current_ui->prepare_status)(clear, fmt, ##args); \
+ else \
+ printk(KERN_INFO fmt "%s", ##args, "\n"); \
+ } while (0)
+
+#define toi_message(sn, lev, log, fmt, a...) \
+do { \
+ if (toi_current_ui && (!sn || test_debug_state(sn))) \
+ toi_current_ui->message(sn, lev, log, fmt, ##a); \
+} while (0)
+
+__exit void toi_ui_cleanup(void);
+extern int toi_ui_init(void);
+extern void toi_ui_exit(void);
+extern int toi_register_ui_ops(struct ui_ops *this_ui);
+extern void toi_remove_ui_ops(struct ui_ops *this_ui);
diff --git a/kernel/power/tuxonice_userui.c b/kernel/power/tuxonice_userui.c
new file mode 100644
index 000000000..edc885c72
--- /dev/null
+++ b/kernel/power/tuxonice_userui.c
@@ -0,0 +1,658 @@
+/*
+ * kernel/power/user_ui.c
+ *
+ * Copyright (C) 2005-2007 Bernard Blackham
+ * Copyright (C) 2002-2015 Nigel Cunningham (nigel at nigelcunningham com au)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/reboot.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/vt.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+
+static char local_printf_buf[1024]; /* Same as printk - should be safe */
+
+static struct user_helper_data ui_helper_data;
+static struct toi_module_ops userui_ops;
+static int orig_kmsg;
+
+static char lastheader[512];
+static int lastheader_message_len;
+static int ui_helper_changed; /* Used at resume-time so don't overwrite value
+ set from initrd/ramfs. */
+
+/* Number of distinct progress amounts that userspace can display */
+static int progress_granularity = 30;
+
+static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
+static int userui_wait_should_wake;
+
+#define toi_stop_waiting_for_userui_key() \
+{ \
+ userui_wait_should_wake = true; \
+ wake_up_interruptible(&userui_wait_for_key); \
+}
+
+/**
+ * ui_nl_set_state - Update toi_action based on a message from userui.
+ *
+ * @n: The bit (1 << bit) to set.
+ */
+static void ui_nl_set_state(int n)
+{
+ /* Only let them change certain settings */
+ static const u32 toi_action_mask =
+ (1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
+ (1 << TOI_LOGALL) |
+ (1 << TOI_SINGLESTEP) |
+ (1 << TOI_PAUSE_NEAR_PAGESET_END);
+ static unsigned long new_action;
+
+ new_action = (toi_bkd.toi_action & (~toi_action_mask)) |
+ (n & toi_action_mask);
+
+ printk(KERN_DEBUG "n is %x. Action flags being changed from %lx "
+ "to %lx.", n, toi_bkd.toi_action, new_action);
+ toi_bkd.toi_action = new_action;
+
+ if (!test_action_state(TOI_PAUSE) &&
+ !test_action_state(TOI_SINGLESTEP))
+ toi_stop_waiting_for_userui_key();
+}
+
+/**
+ * userui_post_atomic_restore - Tell userui that atomic restore just happened.
+ *
+ * Tell userui that atomic restore just occured, so that it can do things like
+ * redrawing the screen, re-getting settings and so on.
+ */
+static void userui_post_atomic_restore(struct toi_boot_kernel_data *bkd)
+{
+ toi_send_netlink_message(&ui_helper_data,
+ USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
+}
+
+/**
+ * userui_storage_needed - Report how much memory in image header is needed.
+ */
+static int userui_storage_needed(void)
+{
+ return sizeof(ui_helper_data.program) + 1 + sizeof(int);
+}
+
+/**
+ * userui_save_config_info - Fill buffer with config info for image header.
+ *
+ * @buf: Buffer into which to put the config info we want to save.
+ */
+static int userui_save_config_info(char *buf)
+{
+ *((int *) buf) = progress_granularity;
+ memcpy(buf + sizeof(int), ui_helper_data.program,
+ sizeof(ui_helper_data.program));
+ return sizeof(ui_helper_data.program) + sizeof(int) + 1;
+}
+
+/**
+ * userui_load_config_info - Restore config info from buffer.
+ *
+ * @buf: Buffer containing header info loaded.
+ * @size: Size of data loaded for this module.
+ */
+static void userui_load_config_info(char *buf, int size)
+{
+ progress_granularity = *((int *) buf);
+ size -= sizeof(int);
+
+ /* Don't load the saved path if one has already been set */
+ if (ui_helper_changed)
+ return;
+
+ if (size > sizeof(ui_helper_data.program))
+ size = sizeof(ui_helper_data.program);
+
+ memcpy(ui_helper_data.program, buf + sizeof(int), size);
+ ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
+}
+
+/**
+ * set_ui_program_set: Record that userui program was changed.
+ *
+ * Side effect routine for when the userui program is set. In an initrd or
+ * ramfs, the user may set a location for the userui program. If this happens,
+ * we don't want to reload the value that was saved in the image header. This
+ * routine allows us to flag that we shouldn't restore the program name from
+ * the image header.
+ */
+static void set_ui_program_set(void)
+{
+ ui_helper_changed = 1;
+}
+
+/**
+ * userui_memory_needed - Tell core how much memory to reserve for us.
+ */
+static int userui_memory_needed(void)
+{
+ /* ball park figure of 128 pages */
+ return 128 * PAGE_SIZE;
+}
+
+/**
+ * userui_update_status - Update the progress bar and (if on) in-bar message.
+ *
+ * @value: Current progress percentage numerator.
+ * @maximum: Current progress percentage denominator.
+ * @fmt: Message to be displayed in the middle of the progress bar.
+ *
+ * Note that a NULL message does not mean that any previous message is erased!
+ * For that, you need toi_prepare_status with clearbar on.
+ *
+ * Returns an unsigned long, being the next numerator (as determined by the
+ * maximum and progress granularity) where status needs to be updated.
+ * This is to reduce unnecessary calls to update_status.
+ */
+static u32 userui_update_status(u32 value, u32 maximum, const char *fmt, ...)
+{
+ static u32 last_step = 9999;
+ struct userui_msg_params msg;
+ u32 this_step, next_update;
+ int bitshift;
+
+ if (ui_helper_data.pid == -1)
+ return 0;
+
+ if ((!maximum) || (!progress_granularity))
+ return maximum;
+
+ if (value < 0)
+ value = 0;
+
+ if (value > maximum)
+ value = maximum;
+
+ /* Try to avoid math problems - we can't do 64 bit math here
+ * (and shouldn't need it - anyone got screen resolution
+ * of 65536 pixels or more?) */
+ bitshift = fls(maximum) - 16;
+ if (bitshift > 0) {
+ u32 temp_maximum = maximum >> bitshift;
+ u32 temp_value = value >> bitshift;
+ this_step = (u32)
+ (temp_value * progress_granularity / temp_maximum);
+ next_update = (((this_step + 1) * temp_maximum /
+ progress_granularity) + 1) << bitshift;
+ } else {
+ this_step = (u32) (value * progress_granularity / maximum);
+ next_update = ((this_step + 1) * maximum /
+ progress_granularity) + 1;
+ }
+
+ if (this_step == last_step)
+ return next_update;
+
+ memset(&msg, 0, sizeof(msg));
+
+ msg.a = this_step;
+ msg.b = progress_granularity;
+
+ if (fmt) {
+ va_list args;
+ va_start(args, fmt);
+ vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+ va_end(args);
+ msg.text[sizeof(msg.text)-1] = '\0';
+ }
+
+ toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
+ &msg, sizeof(msg));
+ last_step = this_step;
+
+ return next_update;
+}
+
+/**
+ * userui_message - Display a message without necessarily logging it.
+ *
+ * @section: Type of message. Messages can be filtered by type.
+ * @level: Degree of importance of the message. Lower values = higher priority.
+ * @normally_logged: Whether logged even if log_everything is off.
+ * @fmt: Message (and parameters).
+ *
+ * This function is intended to do the same job as printk, but without normally
+ * logging what is printed. The point is to be able to get debugging info on
+ * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
+ *
+ * It may be called from an interrupt context - can't sleep!
+ */
+static void userui_message(u32 section, u32 level, u32 normally_logged,
+ const char *fmt, ...)
+{
+ struct userui_msg_params msg;
+
+ if ((level) && (level > console_loglevel))
+ return;
+
+ memset(&msg, 0, sizeof(msg));
+
+ msg.a = section;
+ msg.b = level;
+ msg.c = normally_logged;
+
+ if (fmt) {
+ va_list args;
+ va_start(args, fmt);
+ vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+ va_end(args);
+ msg.text[sizeof(msg.text)-1] = '\0';
+ }
+
+ if (test_action_state(TOI_LOGALL))
+ printk(KERN_INFO "%s\n", msg.text);
+
+ toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
+ &msg, sizeof(msg));
+}
+
+/**
+ * wait_for_key_via_userui - Wait for userui to receive a keypress.
+ */
+static void wait_for_key_via_userui(void)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(&userui_wait_for_key, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ wait_event_interruptible(userui_wait_for_key, userui_wait_should_wake);
+ userui_wait_should_wake = false;
+
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&userui_wait_for_key, &wait);
+}
+
+/**
+ * userui_prepare_status - Display high level messages.
+ *
+ * @clearbar: Whether to clear the progress bar.
+ * @fmt...: New message for the title.
+ *
+ * Prepare the 'nice display', drawing the header and version, along with the
+ * current action and perhaps also resetting the progress bar.
+ */
+static void userui_prepare_status(int clearbar, const char *fmt, ...)
+{
+ va_list args;
+
+ if (fmt) {
+ va_start(args, fmt);
+ lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
+ va_end(args);
+ }
+
+ if (clearbar)
+ toi_update_status(0, 1, NULL);
+
+ if (ui_helper_data.pid == -1)
+ printk(KERN_EMERG "%s\n", lastheader);
+ else
+ toi_message(0, TOI_STATUS, 1, lastheader, NULL);
+}
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress from userui.
+ *
+ * FIXME: Implement timeout?
+ */
+static char userui_wait_for_keypress(int timeout)
+{
+ char key = '\0';
+
+ if (ui_helper_data.pid != -1) {
+ wait_for_key_via_userui();
+ key = ' ';
+ }
+
+ return key;
+}
+
+/**
+ * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
+ *
+ * @result_code: Reason why we're aborting (1 << bit).
+ * @fmt: Message to display if telling the user what's going on.
+ *
+ * Abort a cycle. If this wasn't at the user's request (and we're displaying
+ * output), tell the user why and wait for them to acknowledge the message.
+ */
+static void userui_abort_hibernate(int result_code, const char *fmt, ...)
+{
+ va_list args;
+ int printed_len = 0;
+
+ set_result_state(result_code);
+
+ if (test_result_state(TOI_ABORTED))
+ return;
+
+ set_result_state(TOI_ABORTED);
+
+ if (test_result_state(TOI_ABORT_REQUESTED))
+ return;
+
+ va_start(args, fmt);
+ printed_len = vsnprintf(local_printf_buf, sizeof(local_printf_buf),
+ fmt, args);
+ va_end(args);
+ if (ui_helper_data.pid != -1)
+ printed_len = sprintf(local_printf_buf + printed_len,
+ " (Press SPACE to continue)");
+
+ toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
+
+ if (ui_helper_data.pid != -1)
+ userui_wait_for_keypress(0);
+}
+
+/**
+ * request_abort_hibernate - Abort hibernating or resuming at user request.
+ *
+ * Handle the user requesting the cancellation of a hibernation or resume by
+ * pressing escape.
+ */
+static void request_abort_hibernate(void)
+{
+ if (test_result_state(TOI_ABORT_REQUESTED) ||
+ !test_action_state(TOI_CAN_CANCEL))
+ return;
+
+ if (test_toi_state(TOI_NOW_RESUMING)) {
+ toi_prepare_status(CLEAR_BAR, "Escape pressed. "
+ "Powering down again.");
+ set_toi_state(TOI_STOP_RESUME);
+ while (!test_toi_state(TOI_IO_STOPPED))
+ schedule();
+ if (toiActiveAllocator->mark_resume_attempted)
+ toiActiveAllocator->mark_resume_attempted(0);
+ toi_power_down();
+ }
+
+ toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
+ " ABORTING HIBERNATION ---");
+ set_abort_result(TOI_ABORT_REQUESTED);
+ toi_stop_waiting_for_userui_key();
+}
+
+/**
+ * userui_user_rcv_msg - Receive a netlink message from userui.
+ *
+ * @skb: skb received.
+ * @nlh: Netlink header received.
+ */
+static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int type;
+ int *data;
+
+ type = nlh->nlmsg_type;
+
+ /* A control message: ignore them */
+ if (type < NETLINK_MSG_BASE)
+ return 0;
+
+ /* Unknown message: reply with EINVAL */
+ if (type >= USERUI_MSG_MAX)
+ return -EINVAL;
+
+ /* All operations require privileges, even GET */
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ /* Only allow one task to receive NOFREEZE privileges */
+ if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
+ printk(KERN_INFO "Got NOFREEZE_ME request when "
+ "ui_helper_data.pid is %d.\n", ui_helper_data.pid);
+ return -EBUSY;
+ }
+
+ data = (int *) NLMSG_DATA(nlh);
+
+ switch (type) {
+ case USERUI_MSG_ABORT:
+ request_abort_hibernate();
+ return 0;
+ case USERUI_MSG_GET_STATE:
+ toi_send_netlink_message(&ui_helper_data,
+ USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
+ sizeof(toi_bkd.toi_action));
+ return 0;
+ case USERUI_MSG_GET_DEBUG_STATE:
+ toi_send_netlink_message(&ui_helper_data,
+ USERUI_MSG_GET_DEBUG_STATE,
+ &toi_bkd.toi_debug_state,
+ sizeof(toi_bkd.toi_debug_state));
+ return 0;
+ case USERUI_MSG_SET_STATE:
+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+ return -EINVAL;
+ ui_nl_set_state(*data);
+ return 0;
+ case USERUI_MSG_SET_DEBUG_STATE:
+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+ return -EINVAL;
+ toi_bkd.toi_debug_state = (*data);
+ return 0;
+ case USERUI_MSG_SPACE:
+ toi_stop_waiting_for_userui_key();
+ return 0;
+ case USERUI_MSG_GET_POWERDOWN_METHOD:
+ toi_send_netlink_message(&ui_helper_data,
+ USERUI_MSG_GET_POWERDOWN_METHOD,
+ &toi_poweroff_method,
+ sizeof(toi_poweroff_method));
+ return 0;
+ case USERUI_MSG_SET_POWERDOWN_METHOD:
+ if (nlh->nlmsg_len != NLMSG_LENGTH(sizeof(char)))
+ return -EINVAL;
+ toi_poweroff_method = (unsigned long)(*data);
+ return 0;
+ case USERUI_MSG_GET_LOGLEVEL:
+ toi_send_netlink_message(&ui_helper_data,
+ USERUI_MSG_GET_LOGLEVEL,
+ &toi_bkd.toi_default_console_level,
+ sizeof(toi_bkd.toi_default_console_level));
+ return 0;
+ case USERUI_MSG_SET_LOGLEVEL:
+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+ return -EINVAL;
+ toi_bkd.toi_default_console_level = (*data);
+ return 0;
+ case USERUI_MSG_PRINTK:
+ printk(KERN_INFO "%s", (char *) data);
+ return 0;
+ }
+
+ /* Unhandled here */
+ return 1;
+}
+
+/**
+ * userui_cond_pause - Possibly pause at user request.
+ *
+ * @pause: Whether to pause or just display the message.
+ * @message: Message to display at the start of pausing.
+ *
+ * Potentially pause and wait for the user to tell us to continue. We normally
+ * only pause when @pause is set. While paused, the user can do things like
+ * changing the loglevel, toggling the display of debugging sections and such
+ * like.
+ */
+static void userui_cond_pause(int pause, char *message)
+{
+ int displayed_message = 0, last_key = 0;
+
+ while (last_key != 32 &&
+ ui_helper_data.pid != -1 &&
+ ((test_action_state(TOI_PAUSE) && pause) ||
+ (test_action_state(TOI_SINGLESTEP)))) {
+ if (!displayed_message) {
+ toi_prepare_status(DONT_CLEAR_BAR,
+ "%s Press SPACE to continue.%s",
+ message ? message : "",
+ (test_action_state(TOI_SINGLESTEP)) ?
+ " Single step on." : "");
+ displayed_message = 1;
+ }
+ last_key = userui_wait_for_keypress(0);
+ }
+ schedule();
+}
+
+/**
+ * userui_prepare_console - Prepare the console for use.
+ *
+ * Prepare a console for use, saving current kmsg settings and attempting to
+ * start userui. Console loglevel changes are handled by userui.
+ */
+static void userui_prepare_console(void)
+{
+ orig_kmsg = vt_kmsg_redirect(fg_console + 1);
+
+ ui_helper_data.pid = -1;
+
+ if (!userui_ops.enabled) {
+ printk(KERN_INFO "TuxOnIce: Userui disabled.\n");
+ return;
+ }
+
+ if (*ui_helper_data.program)
+ toi_netlink_setup(&ui_helper_data);
+ else
+ printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
+}
+
+/**
+ * userui_cleanup_console - Cleanup after a cycle.
+ *
+ * Tell userui to cleanup, and restore kmsg_redirect to its original value.
+ */
+
+static void userui_cleanup_console(void)
+{
+ if (ui_helper_data.pid > -1)
+ toi_netlink_close(&ui_helper_data);
+
+ vt_kmsg_redirect(orig_kmsg);
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+ SYSFS_BIT("enable_escape", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_CAN_CANCEL, 0),
+ SYSFS_BIT("pause_between_steps", SYSFS_RW, &toi_bkd.toi_action,
+ TOI_PAUSE, 0),
+ SYSFS_INT("enabled", SYSFS_RW, &userui_ops.enabled, 0, 1, 0, NULL),
+ SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+ 2048, 0, NULL),
+ SYSFS_STRING("program", SYSFS_RW, ui_helper_data.program, 255, 0,
+ set_ui_program_set),
+ SYSFS_INT("debug", SYSFS_RW, &ui_helper_data.debug, 0, 1, 0, NULL)
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+ .type = MISC_MODULE,
+ .name = "userui",
+ .shared_directory = "user_interface",
+ .module = THIS_MODULE,
+ .storage_needed = userui_storage_needed,
+ .save_config_info = userui_save_config_info,
+ .load_config_info = userui_load_config_info,
+ .memory_needed = userui_memory_needed,
+ .post_atomic_restore = userui_post_atomic_restore,
+ .sysfs_data = sysfs_params,
+ .num_sysfs_entries = sizeof(sysfs_params) /
+ sizeof(struct toi_sysfs_data),
+};
+
+static struct ui_ops my_ui_ops = {
+ .update_status = userui_update_status,
+ .message = userui_message,
+ .prepare_status = userui_prepare_status,
+ .abort = userui_abort_hibernate,
+ .cond_pause = userui_cond_pause,
+ .prepare = userui_prepare_console,
+ .cleanup = userui_cleanup_console,
+ .wait_for_key = userui_wait_for_keypress,
+};
+
+/**
+ * toi_user_ui_init - Boot time initialisation for user interface.
+ *
+ * Invoked from the core init routine.
+ */
+static __init int toi_user_ui_init(void)
+{
+ int result;
+
+ ui_helper_data.nl = NULL;
+ strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
+ ui_helper_data.pid = -1;
+ ui_helper_data.skb_size = sizeof(struct userui_msg_params);
+ ui_helper_data.pool_limit = 6;
+ ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
+ ui_helper_data.name = "userspace ui";
+ ui_helper_data.rcv_msg = userui_user_rcv_msg;
+ ui_helper_data.interface_version = 8;
+ ui_helper_data.must_init = 0;
+ ui_helper_data.not_ready = userui_cleanup_console;
+ init_completion(&ui_helper_data.wait_for_process);
+ result = toi_register_module(&userui_ops);
+ if (!result) {
+ result = toi_register_ui_ops(&my_ui_ops);
+ if (result)
+ toi_unregister_module(&userui_ops);
+ }
+
+ return result;
+}
+
+late_initcall(toi_user_ui_init);
diff --git a/kernel/power/user.c b/kernel/power/user.c
new file mode 100644
index 000000000..526e89114
--- /dev/null
+++ b/kernel/power/user.c
@@ -0,0 +1,478 @@
+/*
+ * linux/kernel/power/user.c
+ *
+ * This file provides the user space interface for software suspend/resume.
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/syscalls.h>
+#include <linux/reboot.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pm.h>
+#include <linux/fs.h>
+#include <linux/compat.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+
+#include <asm/uaccess.h>
+
+#include "power.h"
+
+
+#define SNAPSHOT_MINOR 231
+
+static struct snapshot_data {
+ struct snapshot_handle handle;
+ int swap;
+ int mode;
+ bool frozen;
+ bool ready;
+ bool platform_support;
+ bool free_bitmaps;
+} snapshot_state;
+
+atomic_t snapshot_device_available = ATOMIC_INIT(1);
+
+static int snapshot_open(struct inode *inode, struct file *filp)
+{
+ struct snapshot_data *data;
+ int error;
+
+ if (!hibernation_available())
+ return -EPERM;
+
+ lock_system_sleep();
+
+ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
+ error = -EBUSY;
+ goto Unlock;
+ }
+
+ if ((filp->f_flags & O_ACCMODE) == O_RDWR) {
+ atomic_inc(&snapshot_device_available);
+ error = -ENOSYS;
+ goto Unlock;
+ }
+ nonseekable_open(inode, filp);
+ data = &snapshot_state;
+ filp->private_data = data;
+ memset(&data->handle, 0, sizeof(struct snapshot_handle));
+ if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
+ /* Hibernating. The image device should be accessible. */
+ data->swap = swsusp_resume_device ?
+ swap_type_of(swsusp_resume_device, 0, NULL) : -1;
+ data->mode = O_RDONLY;
+ data->free_bitmaps = false;
+ error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+ if (error)
+ pm_notifier_call_chain(PM_POST_HIBERNATION);
+ } else {
+ /*
+ * Resuming. We may need to wait for the image device to
+ * appear.
+ */
+ wait_for_device_probe();
+
+ data->swap = -1;
+ data->mode = O_WRONLY;
+ error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+ if (!error) {
+ error = create_basic_memory_bitmaps();
+ data->free_bitmaps = !error;
+ }
+ if (error)
+ pm_notifier_call_chain(PM_POST_RESTORE);
+ }
+ if (error)
+ atomic_inc(&snapshot_device_available);
+
+ data->frozen = false;
+ data->ready = false;
+ data->platform_support = false;
+
+ Unlock:
+ unlock_system_sleep();
+
+ return error;
+}
+
+static int snapshot_release(struct inode *inode, struct file *filp)
+{
+ struct snapshot_data *data;
+
+ lock_system_sleep();
+
+ swsusp_free();
+ data = filp->private_data;
+ free_all_swap_pages(data->swap);
+ if (data->frozen) {
+ pm_restore_gfp_mask();
+ free_basic_memory_bitmaps();
+ thaw_processes();
+ } else if (data->free_bitmaps) {
+ free_basic_memory_bitmaps();
+ }
+ pm_notifier_call_chain(data->mode == O_RDONLY ?
+ PM_POST_HIBERNATION : PM_POST_RESTORE);
+ atomic_inc(&snapshot_device_available);
+
+ unlock_system_sleep();
+
+ return 0;
+}
+
+static ssize_t snapshot_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *offp)
+{
+ struct snapshot_data *data;
+ ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
+
+ lock_system_sleep();
+
+ data = filp->private_data;
+ if (!data->ready) {
+ res = -ENODATA;
+ goto Unlock;
+ }
+ if (!pg_offp) { /* on page boundary? */
+ res = snapshot_read_next(&data->handle);
+ if (res <= 0)
+ goto Unlock;
+ } else {
+ res = PAGE_SIZE - pg_offp;
+ }
+
+ res = simple_read_from_buffer(buf, count, &pg_offp,
+ data_of(data->handle), res);
+ if (res > 0)
+ *offp += res;
+
+ Unlock:
+ unlock_system_sleep();
+
+ return res;
+}
+
+static ssize_t snapshot_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *offp)
+{
+ struct snapshot_data *data;
+ ssize_t res;
+ loff_t pg_offp = *offp & ~PAGE_MASK;
+
+ lock_system_sleep();
+
+ data = filp->private_data;
+
+ if (!pg_offp) {
+ res = snapshot_write_next(&data->handle);
+ if (res <= 0)
+ goto unlock;
+ } else {
+ res = PAGE_SIZE - pg_offp;
+ }
+
+ res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
+ buf, count);
+ if (res > 0)
+ *offp += res;
+unlock:
+ unlock_system_sleep();
+
+ return res;
+}
+
+static long snapshot_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ int error = 0;
+ struct snapshot_data *data;
+ loff_t size;
+ sector_t offset;
+
+ if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC)
+ return -ENOTTY;
+ if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR)
+ return -ENOTTY;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!mutex_trylock(&pm_mutex))
+ return -EBUSY;
+
+ lock_device_hotplug();
+ data = filp->private_data;
+
+ switch (cmd) {
+
+ case SNAPSHOT_FREEZE:
+ if (data->frozen)
+ break;
+
+ printk("Syncing filesystems ... ");
+ sys_sync();
+ printk("done.\n");
+
+ error = freeze_processes();
+ if (error)
+ break;
+
+ error = create_basic_memory_bitmaps();
+ if (error)
+ thaw_processes();
+ else
+ data->frozen = true;
+
+ break;
+
+ case SNAPSHOT_UNFREEZE:
+ if (!data->frozen || data->ready)
+ break;
+ pm_restore_gfp_mask();
+ free_basic_memory_bitmaps();
+ data->free_bitmaps = false;
+ thaw_processes();
+ data->frozen = false;
+ break;
+
+ case SNAPSHOT_CREATE_IMAGE:
+ if (data->mode != O_RDONLY || !data->frozen || data->ready) {
+ error = -EPERM;
+ break;
+ }
+ pm_restore_gfp_mask();
+ error = hibernation_snapshot(data->platform_support);
+ if (!error) {
+ error = put_user(in_suspend, (int __user *)arg);
+ data->ready = !freezer_test_done && !error;
+ freezer_test_done = false;
+ }
+ break;
+
+ case SNAPSHOT_ATOMIC_RESTORE:
+ snapshot_write_finalize(&data->handle);
+ if (data->mode != O_WRONLY || !data->frozen ||
+ !snapshot_image_loaded(&data->handle)) {
+ error = -EPERM;
+ break;
+ }
+ error = hibernation_restore(data->platform_support);
+ break;
+
+ case SNAPSHOT_FREE:
+ swsusp_free();
+ memset(&data->handle, 0, sizeof(struct snapshot_handle));
+ data->ready = false;
+ /*
+ * It is necessary to thaw kernel threads here, because
+ * SNAPSHOT_CREATE_IMAGE may be invoked directly after
+ * SNAPSHOT_FREE. In that case, if kernel threads were not
+ * thawed, the preallocation of memory carried out by
+ * hibernation_snapshot() might run into problems (i.e. it
+ * might fail or even deadlock).
+ */
+ thaw_kernel_threads();
+ break;
+
+ case SNAPSHOT_PREF_IMAGE_SIZE:
+ image_size = arg;
+ break;
+
+ case SNAPSHOT_GET_IMAGE_SIZE:
+ if (!data->ready) {
+ error = -ENODATA;
+ break;
+ }
+ size = snapshot_get_image_size();
+ size <<= PAGE_SHIFT;
+ error = put_user(size, (loff_t __user *)arg);
+ break;
+
+ case SNAPSHOT_AVAIL_SWAP_SIZE:
+ size = count_swap_pages(data->swap, 1);
+ size <<= PAGE_SHIFT;
+ error = put_user(size, (loff_t __user *)arg);
+ break;
+
+ case SNAPSHOT_ALLOC_SWAP_PAGE:
+ if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
+ error = -ENODEV;
+ break;
+ }
+ offset = alloc_swapdev_block(data->swap);
+ if (offset) {
+ offset <<= PAGE_SHIFT;
+ error = put_user(offset, (loff_t __user *)arg);
+ } else {
+ error = -ENOSPC;
+ }
+ break;
+
+ case SNAPSHOT_FREE_SWAP_PAGES:
+ if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
+ error = -ENODEV;
+ break;
+ }
+ free_all_swap_pages(data->swap);
+ break;
+
+ case SNAPSHOT_S2RAM:
+ if (!data->frozen) {
+ error = -EPERM;
+ break;
+ }
+ /*
+ * Tasks are frozen and the notifiers have been called with
+ * PM_HIBERNATION_PREPARE
+ */
+ error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+ data->ready = false;
+ break;
+
+ case SNAPSHOT_PLATFORM_SUPPORT:
+ data->platform_support = !!arg;
+ break;
+
+ case SNAPSHOT_POWER_OFF:
+ if (data->platform_support)
+ error = hibernation_platform_enter();
+ break;
+
+ case SNAPSHOT_SET_SWAP_AREA:
+ if (swsusp_swap_in_use()) {
+ error = -EPERM;
+ } else {
+ struct resume_swap_area swap_area;
+ dev_t swdev;
+
+ error = copy_from_user(&swap_area, (void __user *)arg,
+ sizeof(struct resume_swap_area));
+ if (error) {
+ error = -EFAULT;
+ break;
+ }
+
+ /*
+ * User space encodes device types as two-byte values,
+ * so we need to recode them
+ */
+ swdev = new_decode_dev(swap_area.dev);
+ if (swdev) {
+ offset = swap_area.offset;
+ data->swap = swap_type_of(swdev, offset, NULL);
+ if (data->swap < 0)
+ error = -ENODEV;
+ } else {
+ data->swap = -1;
+ error = -EINVAL;
+ }
+ }
+ break;
+
+ default:
+ error = -ENOTTY;
+
+ }
+
+ unlock_device_hotplug();
+ mutex_unlock(&pm_mutex);
+
+ return error;
+}
+
+#ifdef CONFIG_COMPAT
+
+struct compat_resume_swap_area {
+ compat_loff_t offset;
+ u32 dev;
+} __packed;
+
+static long
+snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ BUILD_BUG_ON(sizeof(loff_t) != sizeof(compat_loff_t));
+
+ switch (cmd) {
+ case SNAPSHOT_GET_IMAGE_SIZE:
+ case SNAPSHOT_AVAIL_SWAP_SIZE:
+ case SNAPSHOT_ALLOC_SWAP_PAGE: {
+ compat_loff_t __user *uoffset = compat_ptr(arg);
+ loff_t offset;
+ mm_segment_t old_fs;
+ int err;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ err = snapshot_ioctl(file, cmd, (unsigned long) &offset);
+ set_fs(old_fs);
+ if (!err && put_user(offset, uoffset))
+ err = -EFAULT;
+ return err;
+ }
+
+ case SNAPSHOT_CREATE_IMAGE:
+ return snapshot_ioctl(file, cmd,
+ (unsigned long) compat_ptr(arg));
+
+ case SNAPSHOT_SET_SWAP_AREA: {
+ struct compat_resume_swap_area __user *u_swap_area =
+ compat_ptr(arg);
+ struct resume_swap_area swap_area;
+ mm_segment_t old_fs;
+ int err;
+
+ err = get_user(swap_area.offset, &u_swap_area->offset);
+ err |= get_user(swap_area.dev, &u_swap_area->dev);
+ if (err)
+ return -EFAULT;
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA,
+ (unsigned long) &swap_area);
+ set_fs(old_fs);
+ return err;
+ }
+
+ default:
+ return snapshot_ioctl(file, cmd, arg);
+ }
+}
+
+#endif /* CONFIG_COMPAT */
+
+static const struct file_operations snapshot_fops = {
+ .open = snapshot_open,
+ .release = snapshot_release,
+ .read = snapshot_read,
+ .write = snapshot_write,
+ .llseek = no_llseek,
+ .unlocked_ioctl = snapshot_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = snapshot_compat_ioctl,
+#endif
+};
+
+static struct miscdevice snapshot_device = {
+ .minor = SNAPSHOT_MINOR,
+ .name = "snapshot",
+ .fops = &snapshot_fops,
+};
+
+static int __init snapshot_device_init(void)
+{
+ return misc_register(&snapshot_device);
+};
+
+device_initcall(snapshot_device_init);
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
new file mode 100644
index 000000000..019069c84
--- /dev/null
+++ b/kernel/power/wakelock.c
@@ -0,0 +1,268 @@
+/*
+ * kernel/power/wakelock.c
+ *
+ * User space wakeup sources support.
+ *
+ * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This code is based on the analogous interface allowing user space to
+ * manipulate wakelocks on Android.
+ */
+
+#include <linux/capability.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/hrtimer.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+
+#include "power.h"
+
+static DEFINE_MUTEX(wakelocks_lock);
+
+struct wakelock {
+ char *name;
+ struct rb_node node;
+ struct wakeup_source ws;
+#ifdef CONFIG_PM_WAKELOCKS_GC
+ struct list_head lru;
+#endif
+};
+
+static struct rb_root wakelocks_tree = RB_ROOT;
+
+ssize_t pm_show_wakelocks(char *buf, bool show_active)
+{
+ struct rb_node *node;
+ struct wakelock *wl;
+ char *str = buf;
+ char *end = buf + PAGE_SIZE;
+
+ mutex_lock(&wakelocks_lock);
+
+ for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
+ wl = rb_entry(node, struct wakelock, node);
+ if (wl->ws.active == show_active)
+ str += scnprintf(str, end - str, "%s ", wl->name);
+ }
+ if (str > buf)
+ str--;
+
+ str += scnprintf(str, end - str, "\n");
+
+ mutex_unlock(&wakelocks_lock);
+ return (str - buf);
+}
+
+#if CONFIG_PM_WAKELOCKS_LIMIT > 0
+static unsigned int number_of_wakelocks;
+
+static inline bool wakelocks_limit_exceeded(void)
+{
+ return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT;
+}
+
+static inline void increment_wakelocks_number(void)
+{
+ number_of_wakelocks++;
+}
+
+static inline void decrement_wakelocks_number(void)
+{
+ number_of_wakelocks--;
+}
+#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */
+static inline bool wakelocks_limit_exceeded(void) { return false; }
+static inline void increment_wakelocks_number(void) {}
+static inline void decrement_wakelocks_number(void) {}
+#endif /* CONFIG_PM_WAKELOCKS_LIMIT */
+
+#ifdef CONFIG_PM_WAKELOCKS_GC
+#define WL_GC_COUNT_MAX 100
+#define WL_GC_TIME_SEC 300
+
+static LIST_HEAD(wakelocks_lru_list);
+static unsigned int wakelocks_gc_count;
+
+static inline void wakelocks_lru_add(struct wakelock *wl)
+{
+ list_add(&wl->lru, &wakelocks_lru_list);
+}
+
+static inline void wakelocks_lru_most_recent(struct wakelock *wl)
+{
+ list_move(&wl->lru, &wakelocks_lru_list);
+}
+
+static void wakelocks_gc(void)
+{
+ struct wakelock *wl, *aux;
+ ktime_t now;
+
+ if (++wakelocks_gc_count <= WL_GC_COUNT_MAX)
+ return;
+
+ now = ktime_get();
+ list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) {
+ u64 idle_time_ns;
+ bool active;
+
+ spin_lock_irq(&wl->ws.lock);
+ idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time));
+ active = wl->ws.active;
+ spin_unlock_irq(&wl->ws.lock);
+
+ if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC))
+ break;
+
+ if (!active) {
+ wakeup_source_remove(&wl->ws);
+ rb_erase(&wl->node, &wakelocks_tree);
+ list_del(&wl->lru);
+ kfree(wl->name);
+ kfree(wl);
+ decrement_wakelocks_number();
+ }
+ }
+ wakelocks_gc_count = 0;
+}
+#else /* !CONFIG_PM_WAKELOCKS_GC */
+static inline void wakelocks_lru_add(struct wakelock *wl) {}
+static inline void wakelocks_lru_most_recent(struct wakelock *wl) {}
+static inline void wakelocks_gc(void) {}
+#endif /* !CONFIG_PM_WAKELOCKS_GC */
+
+static struct wakelock *wakelock_lookup_add(const char *name, size_t len,
+ bool add_if_not_found)
+{
+ struct rb_node **node = &wakelocks_tree.rb_node;
+ struct rb_node *parent = *node;
+ struct wakelock *wl;
+
+ while (*node) {
+ int diff;
+
+ parent = *node;
+ wl = rb_entry(*node, struct wakelock, node);
+ diff = strncmp(name, wl->name, len);
+ if (diff == 0) {
+ if (wl->name[len])
+ diff = -1;
+ else
+ return wl;
+ }
+ if (diff < 0)
+ node = &(*node)->rb_left;
+ else
+ node = &(*node)->rb_right;
+ }
+ if (!add_if_not_found)
+ return ERR_PTR(-EINVAL);
+
+ if (wakelocks_limit_exceeded())
+ return ERR_PTR(-ENOSPC);
+
+ /* Not found, we have to add a new one. */
+ wl = kzalloc(sizeof(*wl), GFP_KERNEL);
+ if (!wl)
+ return ERR_PTR(-ENOMEM);
+
+ wl->name = kstrndup(name, len, GFP_KERNEL);
+ if (!wl->name) {
+ kfree(wl);
+ return ERR_PTR(-ENOMEM);
+ }
+ wl->ws.name = wl->name;
+ wakeup_source_add(&wl->ws);
+ rb_link_node(&wl->node, parent, node);
+ rb_insert_color(&wl->node, &wakelocks_tree);
+ wakelocks_lru_add(wl);
+ increment_wakelocks_number();
+ return wl;
+}
+
+int pm_wake_lock(const char *buf)
+{
+ const char *str = buf;
+ struct wakelock *wl;
+ u64 timeout_ns = 0;
+ size_t len;
+ int ret = 0;
+
+ if (!capable(CAP_BLOCK_SUSPEND))
+ return -EPERM;
+
+ while (*str && !isspace(*str))
+ str++;
+
+ len = str - buf;
+ if (!len)
+ return -EINVAL;
+
+ if (*str && *str != '\n') {
+ /* Find out if there's a valid timeout string appended. */
+ ret = kstrtou64(skip_spaces(str), 10, &timeout_ns);
+ if (ret)
+ return -EINVAL;
+ }
+
+ mutex_lock(&wakelocks_lock);
+
+ wl = wakelock_lookup_add(buf, len, true);
+ if (IS_ERR(wl)) {
+ ret = PTR_ERR(wl);
+ goto out;
+ }
+ if (timeout_ns) {
+ u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1;
+
+ do_div(timeout_ms, NSEC_PER_MSEC);
+ __pm_wakeup_event(&wl->ws, timeout_ms);
+ } else {
+ __pm_stay_awake(&wl->ws);
+ }
+
+ wakelocks_lru_most_recent(wl);
+
+ out:
+ mutex_unlock(&wakelocks_lock);
+ return ret;
+}
+
+int pm_wake_unlock(const char *buf)
+{
+ struct wakelock *wl;
+ size_t len;
+ int ret = 0;
+
+ if (!capable(CAP_BLOCK_SUSPEND))
+ return -EPERM;
+
+ len = strlen(buf);
+ if (!len)
+ return -EINVAL;
+
+ if (buf[len-1] == '\n')
+ len--;
+
+ if (!len)
+ return -EINVAL;
+
+ mutex_lock(&wakelocks_lock);
+
+ wl = wakelock_lookup_add(buf, len, false);
+ if (IS_ERR(wl)) {
+ ret = PTR_ERR(wl);
+ goto out;
+ }
+ __pm_relax(&wl->ws);
+
+ wakelocks_lru_most_recent(wl);
+ wakelocks_gc();
+
+ out:
+ mutex_unlock(&wakelocks_lock);
+ return ret;
+}